
###########################
### LOAD AND CLEAN DATA ###
###########################
### 1. Load packages and data ####
library(pacman)
p_load(readstata13, foreign, tidyverse, stargazer, stringr, 
       miceadds, ggplot2, reshape2, corrplot, ggpubr, dplyr, 
       coefplot, arules, sampleSelection, fastDummies, car,
       randomizr, estimatr, grid, gridExtra, infer, here, 
       lmtest, RItools, coin, margins, emmeans, MatchIt)


#load original data
i_am("maintext.R")
origdata<-read.dta(here("JuryDataSummer2004.dta"))

#Getting rid of non-standard jury sizes
dat<-origdata %>% filter(num_js==6) 

#Correcting jury 492
dat$ordernum<-ifelse(dat$jurynum==492,"a",dat$ordernum)
#Correcting jury 336
dat$scenario<-ifelse(dat$jurynum==336,5,dat$scenario)

#Labeling scenarios
dat$scenario<-factor(dat$scenario, labels=c("Motorcycle  1", "Circus  2", "Airbag  3", "Baldness  4", "Exercise video  5", "Childproof cap  6", "Workplace benzene  7", "Pajamas  8", "Lift-chair  9", "Computer monitor cancer  10", "Sulfur leak  11", "Train accident  12", "Escalator  13", "Parking lot abduction  14", "Trojan Yachts  15"))


#Deleting those observations w/ missing info for age, gender, race, or education 
#(losing 52 jurors):
dat<-subset.data.frame(dat,is.na(age)==F&is.na(sex)==F&is.na(white)==F&is.na(educ)==F)
#44 jurors missing income will be dealt with via dummy variable later



### 2. Remove juries with any jurors missing data ####

#Check which juries have less than 5 jurors after dropping the above jurors
toofew <- dat %>% group_by(jurynum) %>% summarize(n()) %>% filter(`n()`!=6) %>% select(jurynum)

#Drop juries with less than 6 jurors with complete info
#losing 224 jurors, 46 juries
dat <- dat %>% filter(!(jurynum %in% toofew$jurynum))
#Double check if we dropped the incomplete juries entirely.
dat %>% group_by(jurynum) %>% summarize(n()) %>% filter(`n()`!=6)

rm(toofew)

### 3. Data Cleaning, recoding, variable generation ##########

### CREATING DVs

#Scaling individual "punishment scale" decisions:
dat$scaleis<-(dat$iscale)/8
#Scaling jury-level "punishment scale" decisions:
dat$scalejr<-(dat$jryscale)/8
#Creating logged dollar amounts, adding 1 to deal with zeroes:
dat$lidoll<-log(dat$idoll + 1)
dat$ljrydoll<-log(dat$jrydoll + 1)

#create dollar amount ordinal scale 
dat <- dat %>% mutate(or_idoll = case_when(idoll==0 ~ 0,
                                           idoll%in%c(1:50000)~1,
                                           idoll%in%c(50001:100000)~2,
                                           idoll%in%c(100001:200000)~3,
                                           idoll%in%c(200001:500000-1)~4,
                                           idoll%in%c(500000:750000)~5,
                                           idoll%in%c(750001:1000000)~6,
                                           idoll%in%c(1000001:3000000)~7,
                                           idoll > 3000000 ~8,
                                           TRUE ~ NA_real_))

#rescale 0 to 1
dat$or_idoll<-dat$or_idoll/8

dat$eight.doll <- dat$or_idoll*8

#check distribution of ordinal scale dollar values
ggplot(dat, aes(x=or_idoll)) +   
  labs(title="Histogram - Ordinal Individual Dollar Punishment",x="Ordinal Scale", y = "Count")+
  geom_histogram(binwidth=0.125, color="black", fill="white") + scale_x_continuous(breaks=(0:8)/8)

## Control Variables:
#Calculating logged median pre-deliberation award, punishment, SDs:
dat <- dat %>% group_by(jurynum) %>% 
  mutate(med_or_idoll=median(or_idoll, na.rm=T),
         med_lidoll=median(lidoll, na.rm=T), 
         med_scale = median(scaleis,na.rm = T),
         sd_or_idoll = sd(or_idoll, na.rm=TRUE),
         sd_lidoll = sd(lidoll, na.rm=TRUE),
         sd_scale = sd(scaleis, na.rm=TRUE))


### Individual-level IVs used here and later:
#Female indicator:
dat$ind_female<-NA
dat$ind_female[dat$sex=="Female  2"]<-1
dat$ind_female[dat$sex=="Male  1"]<-0
#Non-white indicator:
dat$ind_nonwhite<-NA
dat$ind_nonwhite[dat$white=="Not White  0"]<-1
dat$ind_nonwhite[dat$white=="White  1"]<-0
# Hispanic indicator:
dat$ind_hisp <- ifelse(dat$ethnic=="Hispanic  4", 1, 0)
#Age indicators:
dat$ind_ageYoung<-ifelse(dat$age%in%c("18-29  1"),1,0)
dat$ind_ageMiddle<-ifelse(dat$age%in%c("30-39  2","40-49  3","50-59  4"),1,0)
dat$ind_ageOld<-ifelse(dat$age%in%c("60-69  5","70+  6"),1,0)
#Education indicators:
dat$ind_eduHS<-ifelse(dat$educ%in%c("Some high school or less  1","High school diploma  2"),1,0)
dat$ind_eduSC<-ifelse(dat$educ%in%c("Some college  3"),1,0)
dat$ind_eduCD<-ifelse(dat$educ%in%c('College diploma  4',"Some graduate school  5","Graduate degree  6"),1,0)
#Income indicators:
dat$ind_incLow<-ifelse(dat$income%in%c("$10,000 or less  1","$10,001 - $20,000  2",
                                       "$20,001 - $30,000  3"),1,0)
dat$ind_incMiddle<-ifelse(dat$income%in%c("$30,001 - $50,000  4"),1,0)
dat$ind_incHigh<-ifelse(dat$income%in%c("$50,001 - $75,000  5","over $ 75,000  6"),1,0)
dat$ind_incNA<-ifelse(is.na(dat$income)==T,1,0)


#Create jury-level medians for each gender type. 
dat <- dat %>% group_by(jurynum, ind_female) %>% mutate(gender_med_or_idoll=median(or_idoll, na.rm=T),
                                                        gender_med_scale = median(scaleis,na.rm = T))
#Create jury-level medians for each race type.
dat <- dat %>% group_by(jurynum, ind_nonwhite) %>% mutate(race_med_or_idoll =median(or_idoll, na.rm=T),
                                                          race_med_scale    =median(scaleis,na.rm = T))

#Create jury-level medians for age: young - old .
dat <- dat %>% group_by(jurynum, ind_ageOld) %>% mutate(old_med_or_idoll =median(or_idoll, na.rm=T),
                                                        old_med_scale    =median(scaleis,na.rm = T))

#Create jury-level medians for education: HS - CD .
dat <- dat %>% group_by(jurynum, ind_eduCD) %>% mutate(cd_med_or_idoll =median(or_idoll, na.rm=T),
                                                       cd_med_scale    =median(scaleis,na.rm = T))

#Create jury-level medians for income: poor - rich .
dat <- dat %>% group_by(jurynum, ind_incHigh) %>% mutate(rich_med_or_idoll =median(or_idoll, na.rm=T),
                                                         rich_med_scale    =median(scaleis,na.rm = T))

#Estimate differences between furthest-apart demographic group medians.
#missing when jury doesn't have any variation on the variable in question
dat <- dat %>% group_by(jurynum) %>% mutate(diff_race_med_or_idoll  = abs(max(race_med_or_idoll,na.rm = T)- min(race_med_or_idoll, na.rm = T)),
                                            diff_race_med_scale     = abs(max(race_med_scale ,na.rm=T)    - min(race_med_scale ,  na.rm=T)),
                                            diff_gender_med_or_idoll= abs(max(gender_med_or_idoll,na.rm=T)- min(gender_med_or_idoll ,  na.rm=T)),
                                            diff_gender_med_scale   = abs(max(gender_med_scale ,  na.rm=T)- min(gender_med_scale ,  na.rm=T)),
                                            diff_old_med_or_idoll   = abs(max(old_med_or_idoll ,  na.rm=T)- min(old_med_or_idoll ,  na.rm=T)),
                                            diff_old_med_scale      = abs(max(old_med_scale ,  na.rm=T)   - min(old_med_scale ,  na.rm=T)),
                                            diff_cd_med_or_idoll    = abs(max(cd_med_or_idoll ,  na.rm=T) - min(cd_med_or_idoll ,  na.rm=T)),
                                            diff_cd_med_scale       = abs(max(cd_med_scale ,  na.rm=T)    - min(cd_med_scale ,  na.rm=T)),
                                            diff_rich_med_or_idoll  = abs(max(rich_med_or_idoll,na.rm=T)  - min(rich_med_or_idoll ,  na.rm=T)),
                                            diff_rich_med_scale     = abs(max(rich_med_scale ,  na.rm=T)  - min(rich_med_scale ,  na.rm=T)))


###Create group-level compositional variables:
#versions including and excluding the observation's juror:

##Group-level number of women:
dat<-dat %>% group_by(jurynum) %>% mutate(totfem.inc=sum(ind_female),
                                          totfem.exc=(sum(ind_female)-ind_female))
dat$female12.inc<-ifelse(dat$totfem.inc%in%c(1,2),1,0)
dat$female3.inc<-ifelse(dat$totfem.inc%in%c(3),1,0)
dat$female4.inc<-ifelse(dat$totfem.inc%in%c(4),1,0)
dat$female56.inc<-ifelse(dat$totfem.inc%in%c(5,6),1,0)
dat$female02.exc<-ifelse(dat$totfem.exc%in%c(0,1,2),1,0)
dat$female3.exc<-ifelse(dat$totfem.exc%in%c(3),1,0)
dat$female45.exc<-ifelse(dat$totfem.exc%in%c(4,5),1,0)
prop.table(table(dat$totfem.inc))

##Group-level for number of non-white:
dat<-dat %>% group_by(jurynum) %>% mutate(totnwhite.inc=sum(ind_nonwhite),
                                          totnwhite.exc=(sum(ind_nonwhite)-ind_nonwhite))
dat$nonwhite0.inc<-ifelse(dat$totnwhite.inc%in%c(0),1,0)
dat$nonwhite1.inc<-ifelse(dat$totnwhite.inc%in%c(1),1,0)
dat$nonwhite24.inc<-ifelse(dat$totnwhite.inc%in%c(2,3,4),1,0)
dat$nonwhite0.exc<-ifelse(dat$totnwhite.exc%in%c(0),1,0)
dat$nonwhite1.exc<-ifelse(dat$totnwhite.exc%in%c(1),1,0)
dat$nonwhite24.exc<-ifelse(dat$totnwhite.exc%in%c(2,3,4),1,0)
prop.table(table(dat$totnwhite.inc))

## Group-level Hispanic:
dat <- dat %>%
  group_by(jurynum) %>%
  mutate(tothisp.inc = sum(ind_hisp),
         tothisp.exc = sum(ind_hisp) - ind_hisp)
dat$hisp0.exc <- ifelse(dat$tothisp.exc==0, 1, 0)
dat$hisp1.exc <- ifelse(dat$tothisp.exc==1, 1, 0)
dat$hisp23.exc <- ifelse(dat$tothisp.exc>1, 1, 0)
dat$hisp0.inc <- ifelse(dat$tothisp.inc==0, 1, 0)
dat$hisp1.inc <- ifelse(dat$tothisp.inc==1, 1, 0)
dat$hisp23.inc <- ifelse(dat$tothisp.inc>1, 1, 0)


##Group-level for number young:
dat<-dat %>% group_by(jurynum) %>% mutate(totyoung.inc=sum(ind_ageYoung),
                                          totyoung.exc=(sum(ind_ageYoung)-ind_ageYoung))
dat$young0.inc<-ifelse(dat$totyoung.inc%in%c(0),1,0)
dat$young1.inc<-ifelse(dat$totyoung.inc%in%c(1),1,0)
dat$young2.inc<-ifelse(dat$totyoung.inc%in%c(2),1,0)
dat$young34.inc<-ifelse(dat$totyoung.inc%in%c(3,4),1,0)
dat$young0.exc<-ifelse(dat$totyoung.exc%in%c(0),1,0)
dat$young1.exc<-ifelse(dat$totyoung.exc%in%c(1),1,0)
dat$young24.exc<-ifelse(dat$totyoung.exc%in%c(2,3,4),1,0)
prop.table(table(dat$totyoung.inc))

##Group-level for number old:
dat<-dat %>% group_by(jurynum) %>% mutate(totold.inc=sum(ind_ageOld),
                                          totold.exc=(sum(ind_ageOld)-ind_ageOld))
dat$old0.inc<-ifelse(dat$totold.inc%in%c(0),1,0)
dat$old1.inc<-ifelse(dat$totold.inc%in%c(1),1,0)
dat$old2.inc<-ifelse(dat$totold.inc%in%c(2),1,0)
dat$old35.inc<-ifelse(dat$totold.inc%in%c(3,4,5),1,0)
dat$old0.exc<-ifelse(dat$totold.exc%in%c(0),1,0)
dat$old1.exc<-ifelse(dat$totold.exc%in%c(1),1,0)
dat$old2.exc<-ifelse(dat$totold.exc%in%c(2),1,0)
dat$old35.exc<-ifelse(dat$totold.exc%in%c(3,4,5),1,0)
prop.table(table(dat$totyoung.inc))

##Group-level for number w/ hs educ or less
dat<-dat %>% group_by(jurynum) %>% mutate(toths.inc=sum(ind_eduHS),
                                          toths.exc=(sum(ind_eduHS)-(ind_eduHS)))
dat$hs0.inc<-ifelse(dat$toths.inc%in%c(0),1,0)
dat$hs1.inc<-ifelse(dat$toths.inc%in%c(1),1,0)
dat$hs2.inc<-ifelse(dat$toths.inc%in%c(2),1,0)
dat$hs35.inc<-ifelse(dat$toths.inc%in%c(3,4,5),1,0)
dat$hs0.exc<-ifelse(dat$toths.exc%in%c(0),1,0)
dat$hs1.exc<-ifelse(dat$toths.exc%in%c(1),1,0)
dat$hs2.exc<-ifelse(dat$toths.exc%in%c(2),1,0)
dat$hs35.exc<-ifelse(dat$toths.exc%in%c(3,4,5),1,0)
##Group-level for number w/ college diploma or more
dat<-dat %>% group_by(jurynum) %>% mutate(totcd.inc=sum(ind_eduCD),
                                          totcd.exc=(sum(ind_eduCD)-(ind_eduCD)))
dat$college01.inc<-ifelse(dat$totcd.inc%in%c(0,1),1,0)
dat$college2.inc<-ifelse(dat$totcd.inc%in%c(2),1,0)
dat$college3.inc<-ifelse(dat$totcd.inc%in%c(3),1,0)
dat$college45.inc<-ifelse(dat$totcd.inc%in%c(4,5),1,0)
dat$college0.exc<-ifelse(dat$totcd.exc%in%c(0),1,0)
dat$college1.exc<-ifelse(dat$totcd.exc%in%c(1),1,0)
dat$college2.exc<-ifelse(dat$totcd.exc%in%c(2),1,0)
dat$college3.exc<-ifelse(dat$totcd.exc%in%c(3),1,0)
dat$college45.exc<-ifelse(dat$totcd.exc%in%c(4,5),1,0)
##Group-level for number w/ low income
dat<-dat %>% group_by(jurynum) %>% mutate(totpoor.inc=sum(ind_incLow),
                                          totpoor.exc=(sum(ind_incLow)-(ind_incLow)))
dat$lowinc0.inc<-ifelse(dat$totpoor.inc%in%c(0),1,0)
dat$lowinc1.inc<-ifelse(dat$totpoor.inc%in%c(1),1,0)
dat$lowinc2.inc<-ifelse(dat$totpoor.inc%in%c(2),1,0)
dat$lowinc3.inc<-ifelse(dat$totpoor.inc%in%c(3),1,0)
dat$lowinc45.inc<-ifelse(dat$totpoor.inc%in%c(4,5),1,0)
dat$lowinc0.exc<-ifelse(dat$totpoor.exc%in%c(0),1,0)
dat$lowinc1.exc<-ifelse(dat$totpoor.exc%in%c(1),1,0)
dat$lowinc2.exc<-ifelse(dat$totpoor.exc%in%c(2),1,0)
dat$lowinc35.exc<-ifelse(dat$totpoor.exc%in%c(3,4,5),1,0)
##Group-level for number w/ high income
dat<-dat %>% group_by(jurynum) %>% mutate(totrich.inc=sum(ind_incHigh),
                                          totrich.exc=(sum(ind_incHigh)-(ind_incHigh)))
dat$highinc0.inc<-ifelse(dat$totrich.inc%in%c(0),1,0)
dat$highinc1.inc<-ifelse(dat$totrich.inc%in%c(1),1,0)
dat$highinc2.inc<-ifelse(dat$totrich.inc%in%c(2),1,0)
dat$highinc3.inc<-ifelse(dat$totrich.inc%in%c(3),1,0)
dat$highinc45.inc<-ifelse(dat$totrich.inc%in%c(4,5),1,0)
dat$highinc0.exc<-ifelse(dat$totrich.exc%in%c(0),1,0)
dat$highinc1.exc<-ifelse(dat$totrich.exc%in%c(1),1,0)
dat$highinc2.exc<-ifelse(dat$totrich.exc%in%c(2),1,0)
dat$highinc35.exc<-ifelse(dat$totrich.exc%in%c(3,4,5),1,0)

# DELETE IF NOT USED LATER
##Dummies for juries with roughly 50/50 opposite socio-demographic features
dat$whitenonwhite24<-ifelse(dat$totnwhite.inc%in%c(2,3,4),1,0)
dat$youngold24<-ifelse(dat$totyoung.inc%in%c(2,3,4),1,ifelse(dat$totold.inc%in%c(2,3,4),1,0))
dat$hscd24<-ifelse(dat$toths.inc%in%c(2,3,4),1,ifelse(dat$totcd.inc%in%c(2,3,4),1,0))
dat$poorrich24<-ifelse(dat$totpoor.inc%in%c(2,3,4),1,ifelse(dat$totrich.inc%in%c(2,3,4),1,0))


####


# Create jury-level ordinal dollar variable:
dat <- dat %>% mutate(jury.eight = case_when(jrydoll==0 ~ 0,
                                             jrydoll>=1&jrydoll<=50000~1,
                                             jrydoll>=50001&jrydoll<=100000~2,
                                             jrydoll>=100001&jrydoll<=200000~3,
                                             jrydoll>=200001&jrydoll<500000~4,
                                             jrydoll>=500000&jrydoll<=750000~5,
                                             jrydoll>=750001&jrydoll<=1000000~6,
                                             jrydoll>=1000001&jrydoll<=3000000~7,
                                             jrydoll > 3000000 ~8,
                                             TRUE ~ NA_real_))


##Creating new indicators of group composition
dat$white5.exc <- dat$nonwhite0.exc
dat$white4.exc <- dat$nonwhite1.exc
dat$white13.exc <- dat$nonwhite24.exc

dat$male35.exc <- dat$female02.exc
dat$male2.exc <- dat$female3.exc
dat$male01.exc <- dat$female45.exc


#Dollar deliberation first then ratings: a
#Ratings deliberation first then dollar: b

#Divide the data into pre and post deliberation rounds:
pre_del<-subset.data.frame(dat,ordernum=="b")
post_del<-subset.data.frame(dat,ordernum=="a")

##Combine ratings and dollars into one variable, by round
##Individual ratings
pre_del$scale_all_rd1<-pre_del$scaleis
pre_del$scale_all_rd2<-pre_del$or_idoll

pre_del$med_scale_rd1<-pre_del$med_scale
pre_del$med_scale_rd2<- pre_del$med_or_idoll

pre_del$sd_scale_rd1<-pre_del$sd_scale
pre_del$sd_scale_rd2<-pre_del$sd_or_idoll

post_del$scale_all_rd1<-post_del$or_idoll
post_del$scale_all_rd2<-post_del$scaleis

post_del$med_scale_rd1<-post_del$med_or_idoll
post_del$med_scale_rd2<-post_del$med_scale

post_del$sd_scale_rd1<-post_del$sd_or_idoll 
post_del$sd_scale_rd2<-post_del$sd_scale

##Jury Verdicts
pre_del$verdict_rd1 <- pre_del$scalejr
pre_del$verdict_rd2 <- pre_del$jury.eight/8

post_del$verdict_rd1 <- post_del$jury.eight/8
post_del$verdict_rd2 <- post_del$scalejr 

dat_byround<-rbind(pre_del, post_del)




#CREATE Group-level data sets:
group.data<-dplyr::select(dat_byround,c(female12.inc,female3.inc,female4.inc,female56.inc,
                                        nonwhite0.inc,nonwhite1.inc,nonwhite24.inc,
                                        hisp0.inc, hisp1.inc, hisp23.inc,
                                        young0.inc,young1.inc,young2.inc,young34.inc,old35.inc,old2.inc,old1.inc,old0.inc,
                                        hs0.inc,hs1.inc,hs2.inc,hs35.inc,college3.inc,college2.inc,college01.inc, college45.inc,
                                        lowinc0.inc,lowinc1.inc,lowinc2.inc,lowinc3.inc,lowinc45.inc,
                                        highinc45.inc,highinc3.inc,highinc2.inc,highinc1.inc,highinc0.inc,ordernum,
                                        scalejr,ljrydoll,jrydoll,med_scale,med_or_idoll,sd_scale,sd_or_idoll, med_lidoll, sd_lidoll, jurynum,scenario, 
                                        diff_race_med_or_idoll,diff_race_med_scale, diff_gender_med_or_idoll,
                                        verdict_rd1, verdict_rd2, med_scale_rd1, med_scale_rd2, sd_scale_rd1, sd_scale_rd2,
                                        diff_old_med_or_idoll, diff_old_med_scale, diff_cd_med_or_idoll, diff_cd_med_scale, diff_rich_med_or_idoll, diff_rich_med_scale,
                                        diff_gender_med_scale, totfem.inc, totnwhite.inc, totold.inc, totcd.inc, totrich.inc,totpoor.inc, toths.inc, totyoung.inc, jury.eight))
group.data<-unique(group.data)
group.data$or_jrydoll<- group.data$jury.eight/8

#Separate dollar-first jury data from punishment scale-first jury data.
group.pre_del<-subset.data.frame(group.data,ordernum=="b")
group.post_del<-subset.data.frame(group.data,ordernum=="a")

#check scenario mean punishment scales
post_del<-post_del %>% group_by(scenario) %>% mutate(meanscaleis=mean(scaleis, na.rm=TRUE))
scenariomeans<- unique(post_del[,c("meanscaleis", "scenario")]) 
as.data.frame(scenariomeans[order(scenariomeans$meanscaleis),])

#Make Workplace benzene  7 the base category in factor variable "scenario".
dat_byround <- dat_byround %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))

post_del <- post_del %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))
pre_del <- pre_del %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))


group.post_del <- group.post_del %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))
group.pre_del <- group.pre_del %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))

group.data <- group.data %>%
  mutate(scenario = relevel(scenario, ref = "Workplace benzene  7"))



#Create hung jury dependent variables: nothung=1, hung=0
group.post_del$or_jrydoll_nothung<-1
group.post_del$or_jrydoll_nothung[is.na(group.post_del$jury.eight)==TRUE]<-0

group.post_del$scalejr_nothung<-1
group.post_del$scalejr_nothung[is.na(group.post_del$scalejr)==TRUE]<-0

group.pre_del$or_jrydoll_nothung<-1
group.pre_del$or_jrydoll_nothung[is.na(group.pre_del$jury.eight)==TRUE]<-0

group.pre_del$scalejr_nothung<-1
group.pre_del$scalejr_nothung[is.na(group.pre_del$scalejr)==TRUE]<-0

group.pre_del$round1_nothung<-1
group.pre_del$round1_nothung[is.na(group.pre_del$scalejr)==TRUE]<-0

group.pre_del$round2_nothung<-1
group.pre_del$round2_nothung[is.na(group.pre_del$jury.eight)==TRUE]<-0

group.post_del$round1_nothung<-1
group.post_del$round1_nothung[is.na(group.post_del$jury.eight)==TRUE]<-0

group.post_del$round2_nothung<-1
group.post_del$round2_nothung[is.na(group.post_del$scalejr)==TRUE]<-0

# race difference medians

group.pre_del$diff_race_med_round1 <- group.pre_del$diff_race_med_scale
group.pre_del$diff_race_med_round2 <- group.pre_del$diff_race_med_or_idoll

group.post_del$diff_race_med_round1 <- group.post_del$diff_race_med_or_idoll
group.post_del$diff_race_med_round2 <- group.post_del$diff_race_med_scale



group.data$or_jrydoll_nothung<-1
group.data$or_jrydoll_nothung[is.na(group.data$jury.eight)==TRUE]<-0

group.data$scalejr_nothung<-1
group.data$scalejr_nothung[is.na(group.data$scalejr)==TRUE]<-0



percentiles<- c(1, 50000, 100000, 200000, 500000-1, 750000, 1000000, 3000000)
group.percentiles<- c(1, 50000, 100000, 200000, 500000-1, 750000, 1000000, 3000000)

group.dat_byround<-rbind(group.pre_del, group.post_del)

toofew_byround <- dat_byround %>% group_by(jurynum) %>% summarize(n()) %>% filter(`n()`!=6) %>% select(jurynum)
rm(toofew_byround)
rm(percentiles, group.percentiles, scenariomeans)


###########################
### ANALYSIS: APPENDIX TEXT ###
###########################



## Appendix Figure A1: Simulated Jury Randomization ####

#select juror demographic variables
pj.food <- dat %>% select(ind_female, ind_nonwhite, ind_ageYoung, ind_ageMiddle, ind_ageOld,
                          ind_incLow, ind_incMiddle, ind_incHigh, ind_incNA,
                          ind_eduHS, ind_eduSC, ind_eduCD)

#create indices to use for randomly selecting juries
jurynum.vec <- rep(c(1:490),each=6)[1:nrow(pj.food)]
#repeat 500 times
jurynum.vec <- rep(jurynum.vec, 500)


set.seed(27514)
# create 500 new datasets by randomly sampling rows from original
pj.data <- pj.food %>% ungroup %>%
  infer::rep_sample_n(size = nrow(pj.food), replace = FALSE, reps = 500) 
# assign new dataset jurors to juries
pj.data$jurynum.new <- jurynum.vec

### Creating the jury-level variables:
pj.data <- pj.data %>% ungroup() %>% group_by(replicate,jurynum.new) %>% mutate(
  totfem.inc = sum(ind_female),
  totnwhite.inc=sum(ind_nonwhite),
  totyoung.inc=sum(ind_ageYoung),
  totold.inc=sum(ind_ageOld),
  toths.inc=sum(ind_eduHS),
  totcd.inc=sum(ind_eduCD),
  totpoor.inc=sum(ind_incLow),
  totrich.inc=sum(ind_incHigh)
)

### summarize characteristics of simulated juries
pj.juries <- pj.data %>% ungroup() %>% select(
  totfem.inc, totnwhite.inc, totyoung.inc, totold.inc, toths.inc, totcd.inc, totpoor.inc, totrich.inc,
  replicate, jurynum.new) %>% unique()

# and a similar dataset for the real juries:
real.juries <- dat %>% select(totfem.inc, totnwhite.inc, totyoung.inc, totold.inc, toths.inc,totcd.inc,
                              totpoor.inc,totrich.inc,jurynum) %>% unique()

#for each characteristic, plot distributions for real and simulated juries
plot1<-ggplot()+
  geom_density(data=pj.juries, aes(totfem.inc,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totfem.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number of Women in Jury",y="Density",
       title="Gender Distribution")

plot2<-ggplot()+
  geom_density(data=pj.juries, aes(totnwhite.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totnwhite.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number POC in Jury",y="Density",
       title="Race Distribution")

plot3<-ggplot()+
  geom_density(data=pj.juries, aes(totyoung.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totyoung.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number Young in Jury",y="Density",
       title="Age Distribution (Young)")

plot4<-ggplot()+
  geom_density(data=pj.juries, aes(totold.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totold.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number Older in Jury",y="Density",
       title="Age Distribution (Older)")

plot5<-ggplot()+
  geom_density(data=pj.juries, aes(toths.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(toths.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number HS or Less in Jury",y="Density",
       title="Education Distribution (HS or Less)")

plot6<-ggplot()+
  geom_density(data=pj.juries, aes(totcd.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totcd.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number College Degree in Jury",y="Density",
       title="Education Distribution (College Degree)")

plot7<-ggplot()+
  geom_density(data=pj.juries, aes(totpoor.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totpoor.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number Low-Income in Jury",y="Count",
       title="Income Distribution (Low-Income)")

plot8<-ggplot()+
  geom_density(data=pj.juries, aes(totrich.inc ,group = replicate), adjust=3, color="gray80")+
  geom_density(data=real.juries, aes(totrich.inc), adjust=3, color="black")+
  theme_bw()+
  labs(x="Number High-Income in Jury",y="Count",
       title="Income Distribution (High-Income)")

pdf("Appendix Figures/fakejuries.pdf", width=12, height=20)
grid.arrange(plot1,plot2,plot3,plot4,plot5,plot6,plot7,plot8, ncol=2,
             top = textGrob("Jury-Level Distributions",gp=gpar(fontsize=20,font=3)),
             bottom = textGrob("",
                               gp=gpar(fontsize=10,font=1)))
dev.off()


rm(plot1,plot2,plot3,plot4,plot5,plot6,plot7,plot8)
#
## Appendix Figures A2 and A3: Jury Composition Histograms with category coding indicated -----------------------------

#remove data outside two key experimental groups
graph.data <- dat %>% filter(order!="C")

## A2: JUROR LEVEL -------------

#plot each demographic feature by deliberation order
juror1 <- ggplot(graph.data, aes(as.factor(ind_female),fill=factor(order)))+
  geom_bar(width=.5,position=position_dodge(width = .5))+
  theme_bw()+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5), size = 3)+
  labs(x="Juror Gender",y="Count",
       title="Gender Distribution")+
  geom_vline(xintercept=c(1.5), linetype="dashed")+
  scale_x_discrete(labels=c("Male","Female"))

juror2 <- ggplot(graph.data, aes(as.factor(white),fill=factor(order)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5), size = 3)+
  labs(x="Juror Race",y="Count",
       title="Racial Distribution")+
  geom_vline(xintercept=c(1.5), linetype="dashed")+
  scale_x_discrete(labels=c("POC","White"))

juror3 <- ggplot(graph.data, aes(as.factor(age),fill=factor(order)))+
  geom_bar(width=.5,position=position_dodge(width = .5))+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5), size = 3)+
  labs(x="Juror Age",y="Count",
       title="Age Distribution")+
  geom_vline(xintercept=c(1.5,4.5), linetype="dashed")+
  scale_x_discrete(labels=c("18-29","30-39","40-49","50-59","60-69","70+"))

juror4 <- ggplot(graph.data, aes(as.factor(educ),fill=factor(order)))+
  geom_bar(width=.5,position=position_dodge(width = .5))+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5), size = 3)+
  labs(x="Juror Education",y="Count",
       title="Education Distribution")+
  geom_vline(xintercept=c(2.5,3.5), linetype="dashed")+
  scale_x_discrete(labels=c("Some HS","HS Dip","Some College","College Dip","Some Post-Grad","Grad Degree"))

juror5 <- ggplot(graph.data, aes(as.factor(income),fill=factor(order)))+
  geom_bar(width=.5,position=position_dodge(width = .5))+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5), size = 3)+
  labs(x="Juror Income",y="Count",
       title="Income Distribution")+
  geom_vline(xintercept=c(3.5,4.5,6.5), linetype="dashed")+
  scale_x_discrete(labels=c("<$10k","$10k-$20k","$20k-$30k","$30k-$50k","$50k-$75k","$75k<","Missing"))

juror6 <- ggplot(graph.data, aes(or_idoll,fill=factor(order)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Ordinal Dollar", y="Density",
       title = "Individual Dollar Punishment",
       caption = "Note: 36 NA's.")

juror7 <- ggplot(graph.data, aes(scaleis,fill=factor(order)))+
  geom_density(alpha=.5)+
  theme_bw()+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Punishment Rating", y="Density",
       title = "Individual Punishment Rating",
       caption = "Note: 3 NA's")

#combine plots 
g<-arrangeGrob(juror1,juror2,juror3,juror4,juror5,juror6,juror7, ncol=2,
               top = textGrob("Juror-Level Distributions",gp=gpar(fontsize=20,font=3)),
               bottom = textGrob("Distribution of attributes across all jurors, split by deliberation order. Vertical lines used to indicate cutoffs for grouping.", gp=gpar(fontsize=10,font=1)))

#save out plots
ggsave("Appendix Figures/jurordist.pdf",g,width = 12,height = 14)

rm(juror1,juror2,juror3,juror4,juror5,juror6,juror7)

## A3: JURY LEVEL------

#delete juries outside main conditions
graph.group <- group.data %>% filter(ordernum!="c") 

jury1 <- ggplot(graph.group, aes(as.factor(totfem.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of Women in Jury",y="Count",
       title="Gender Distribution")+
  geom_vline(xintercept=c(2.5,3.5,4.5), linetype="dashed")

jury2 <- ggplot(graph.group, aes(as.factor(totnwhite.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of POC in Jury",y="Count",
       title="Race Distribution")+
  geom_vline(xintercept=c(1.5,2.5), linetype="dashed")

jury3 <- ggplot(graph.group, aes(as.factor(totyoung.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of Young in Jury",y="Count",
       title="Age Distribution (Young)")+
  geom_vline(xintercept=c(1.5,2.5,3.5), linetype="dashed")

jury4 <- ggplot(graph.group, aes(as.factor(totold.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of Older in Jury",y="Count",
       title="Age Distribution (Older)")+
  geom_vline(xintercept=c(1.5,2.5,3.5), linetype="dashed")

jury5 <- ggplot(graph.group, aes(as.factor(toths.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of HS or Less in Jury",y="Count",
       title="Education Distribution (HS or Less)")+
  geom_vline(xintercept=c(1.5,2.5,3.5), linetype="dashed")

jury6 <- ggplot(graph.group, aes(as.factor(totcd.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of College Degree in Jury",y="Count",
       title="Education Distribution (College Degree)")+
  geom_vline(xintercept=c(1.5,2.5,3.5,4.5), linetype="dashed")

jury7 <- ggplot(graph.group, aes(as.factor(totpoor.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of Low-Income in Jury",y="Count",
       title="Income Distribution (Low-Income)")+
  geom_vline(xintercept=c(1.5,2.5,3.5,4.5), linetype="dashed")

jury8 <- ggplot(graph.group, aes(as.factor(totrich.inc),fill=factor(ordernum)))+
  geom_bar(width=.5,position=position_dodge(width = .5), size = 3)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  stat_count(aes(label=..count..), vjust=0,
             geom="text", position=position_dodge(width = .5))+
  labs(x="Number of High-Income in Jury",y="Count",
       title="Income Distribution (High-Income)")+
  geom_vline(xintercept=c(1.5,2.5,3.5,4.5), linetype="dashed")

jury9 <- ggplot(graph.group, aes(or_jrydoll,fill=factor(ordernum)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Ordinal Dollar ", y="Density",
       title = "Group Dollar Punishment")

jury10 <-ggplot(graph.group, aes(scalejr,fill=factor(ordernum)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Punishment Rating", y="Density",
       title = "Group Punishment Rating")

#combine panels 
g<-arrangeGrob(jury1,jury2,jury3,jury4,jury5,jury6,jury7,jury8,jury9,jury10, ncol=2,
               top = textGrob("Jury-Level Distributions",gp=gpar(fontsize=20,font=3)),
               bottom = textGrob("Distribution of attributes across all juries, split by deliberation order. Vertical lines used to indicate cutoffs for grouping.", gp=gpar(fontsize=10,font=1)))
#save plot
ggsave("Appendix Figures/jurydist.pdf",g,width = 12,height = 16)

rm(jury1,jury2,jury3,jury4,jury5,jury6,jury7,jury8,jury9,jury10)



## Appendix Tables A2 and A3: Summary Statistics ####

# fixing stargazer bug in v5.2.3 (from https://gist.github.com/alexeyknorre/b0780836f4cec04d41a863a683f91b53)
# Unload stargazer if loaded
detach("package:stargazer",unload=T)
# Delete it
remove.packages("stargazer")
# Download the source
download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
# Unpack
untar("stargazer_5.2.3.tar.gz")
# Read the sourcefile with .inside.bracket fun
stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
# Move the length check 5 lines up so it precedes is.na(.)
stargazer_src[1990] <- stargazer_src[1995]
stargazer_src[1995] <- ""
# Save back
writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
# Compile and install the patched package
install.packages("stargazer", repos = NULL, type="source")
library(stargazer)

stargazer(as.data.frame(dat_byround[c("ind_nonwhite", "ind_female", "ind_ageYoung", "ind_ageMiddle", "ind_ageOld", "ind_eduHS", "ind_eduSC", "ind_eduCD", "ind_incLow", "ind_incMiddle", "ind_incHigh", "ind_incNA", "scale_all_rd1")]), 
          type = "latex",
          digits = 2,
          summary.stat = c("n", "mean", "sd", "min", "max"),
          title = "Key Variable Summary Statistics (Individual-Level, All Juries)",
          covariate.labels = c("Nonwhite", "Female", "Younger (18-29)", "Middle Age (30-59)", "Older (60+)", "High School", "Some College", "College Graduate", "Low Income (< $30K)", "Middle Income ($30-50K)", "High Income (> $50K)", "Missing Income", "Pre-Deliberation Punishment Preference"),
          out="Appendix Figures/summary_ind_all.tex")

stargazer(as.data.frame(group.dat_byround[c("totfem.inc", "totnwhite.inc", "totyoung.inc", "totold.inc", "toths.inc", "totcd.inc", "totpoor.inc", "totrich.inc", "verdict_rd1", "verdict_rd2", "round1_nothung", "round2_nothung", "med_scale_rd1", "med_scale_rd2", "sd_scale_rd1", "sd_scale_rd2" )]),
          type = "latex",
          digits = 2,
          summary.stat = c("n", "mean", "sd", "min", "max"),
          title = "Key Variable Summary Statistics (Jury-Level, All Juries)",
          covariate.labels = c("Number of Women", "Number of POC", "Number of Younger", "Number of Older", "Number of High School Only Grads", "Number of College Grads", "Number of Low Income Jurors", "Number of High Income Jurors", "Jury Verdict (Round 1)", "Jury Verdict (Round 2)", "Reached Verdict (Round 1)", "Reached Verdict (Round 2)", "Median Pre-Deliberation Prefs (Round 1)", "Median Pre-Deliberation Prefs (Round 2)", "SD Pre-Delib Prefs (Round 1)", "SD Pre-Delib Prefs (Round 2)"),
          out = "Appendix Figures/summary_jury_all.tex")

## Appendix Figure A4: Correlation plots between demographic indicators #####
# use corrplot package to plot correlations across demographic indicators 
  # use individual-level dataset
  # use variable codings that exclude the focal juror

pdf(file="Appendix Figures/group_corrplot_all.pdf", width=10, height=10)
cor(dat_byround[,c("white5.exc", "white4.exc", "white13.exc", "male35.exc", "male2.exc", "male01.exc", "old35.exc", "old2.exc", "old1.exc", "old0.exc", "college45.exc", "college3.exc", "college2.exc", "college1.exc","college0.exc" , "highinc35.exc", "highinc2.exc", "highinc1.exc", "highinc0.exc", "med_scale_rd1", "sd_scale_rd1")])
corrplot(cor(dat_byround[,c("white5.exc", "white4.exc", "white13.exc", "male35.exc", "male2.exc", "male01.exc", "old35.exc", "old2.exc", "old1.exc", "old0.exc", "college45.exc", "college3.exc", "college2.exc", "college1.exc","college0.exc" , "highinc35.exc", "highinc2.exc", "highinc1.exc", "highinc0.exc", "med_scale_rd1", "sd_scale_rd1")]),addCoef.col = "black", number.cex=0.75, method="ellipse", type="lower", tl.srt = 45)
dev.off()

## Appendix Figure A5: Dollar Punishment Distributions + ordinal scale markers ####
# plot distributions of dollar preferences, showing category divisions for scale version

#remove observations outside 2 key experimental groups
graph.data <- dat %>% filter(order!="C")
graph.group <- group.data %>% filter(ordernum!="c") 
#note category divisions 
percentiles<- c(1, 50000, 100000, 200000, 500000-1, 750000, 1000000, 3000000)
group.percentiles<- c(1, 50000, 100000, 200000, 500000-1, 750000, 1000000, 3000000)

#individual dollar preferences, raw dollar amounts
dollardist1<-ggplot(graph.data, aes(idoll,fill=factor(order)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Dollar Punishment", y="Density",
       title = "Individual Dollar Punishment",
       caption = "Note: 36 NA's.")+
  geom_vline(xintercept=percentiles, linetype="dashed")

#individual dollar preferences, logged dollar amounts
dollardist2<-ggplot(graph.data, aes(log(idoll),fill=factor(order)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Logged Dollar Punishment", y="Density",
       title = "Individual Dollar Punishment",
       caption = "Note: 36 NA's.")+
  geom_vline(xintercept=log(percentiles), linetype="dashed")

# verdict dollar amounts, raw dollars
dollardist3<-ggplot(graph.group, aes(jrydoll,fill=factor(ordernum)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Dollar Punishment", y="Density",
       title = "Group Dollar Punishment")+
  geom_vline(xintercept=group.percentiles, linetype="dashed")

# verdict dollar amounts, logged dollars
dollardist4<-ggplot(graph.group, aes(log(jrydoll),fill=factor(ordernum)))+
  geom_density(alpha=.5)+
  theme_bw()+
  theme(legend.position = "none")+
  scale_fill_grey(start = .4, end = .8,name="Delib. Order",
                  labels=c("Dollar-First","Rating-First"))+
  labs(x = "Logged Dollar Punishment", y="Density",
       title = "Group Dollar Punishment")+
  geom_vline(xintercept=log(group.percentiles), linetype="dashed")

#combine panels and save
ggarrange(dollardist1,  dollardist2, dollardist3,dollardist4 ,  
          labels = c("A", "B", "C", "D"),
          ncol = 2, nrow = 2)
ggsave("Appendix Figures/dollar_dists.pdf", width = 15, height = 15)

rm(dollardist1,  dollardist2, dollardist3,dollardist4)

## Appendix Table A4: Pre-deliberation prefs. regressed on demographics ####

# scale preferences
predlib_prefs_scale<-lm(scaleis~
                          ind_nonwhite+
                          ind_female+
                          ind_ageYoung+ind_ageOld+
                          ind_eduHS+ind_eduCD+
                          ind_incLow+ind_incHigh+
                          ind_incNA+
                          scenario,data=pre_del)

# raw dollar preferences
predlib_prefs_doll<-lm(or_idoll~
                         ind_nonwhite+
                         ind_female+
                         ind_ageYoung+ind_ageOld+
                         ind_eduHS+ind_eduCD+
                         ind_incLow+ind_incHigh+
                         ind_incNA+
                         scenario,data=post_del)

# logged dollar preferences
predlib_prefs_doll_log<-lm(lidoll~
                             ind_nonwhite+
                             ind_female+
                             ind_ageYoung+ind_ageOld+
                             ind_eduHS+ind_eduCD+
                             ind_incLow+ind_incHigh+
                             ind_incNA+
                             scenario,data=post_del)

# combine models and print
stargazer( predlib_prefs_scale,  predlib_prefs_doll, predlib_prefs_doll_log, 
           se=list(coef(summary(predlib_prefs_scale,cluster = c("jurynum")))[, 2],
                   coef(summary(predlib_prefs_doll,cluster = c("jurynum")))[, 2],
                   coef(summary(predlib_prefs_doll_log,cluster = c("jurynum")))[, 2]),
           type="latex", out="Appendix Figures/predelib_indiv_sep_logadd.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
           , omit= "scenario" 
           , align=TRUE
           ,covariate.labels = c("POC" ,"Female" ,  "Young" ,  "Older" , 
                                 "High School Grad" ,  "College Grad" , 
                                 "Low Income" ,  "High Income" , "Missing Income" )
           , omit.stat=c("f", "ser"), omit.table.layout = "n", dep.var.labels=c("Rating", "Dollars", "Log Dollars"), no.space = T, single.row = T,
           add.lines=list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes")))
rm(predlib_prefs_scale, predlib_prefs_doll, predlib_prefs_doll_log)

## Appendix Table A5: Pre-deliberation prefs. regressed on demos + composition ####

var_order <- c("ind_nonwhite", "ind_female", "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD", "ind_incLow", "ind_incHigh", "ind_incNA", "nonwhite1.exc", "nonwhite0.exc", "female3.exc", "female02.exc", "old1.exc", "old2.exc", "old35.exc", "college1.exc", "college2.exc", "college3.exc", "college45.exc", "highinc1.exc", "highinc2.exc", "highinc35.exc")


# scale preferences 
predlib_prefs_scale_group<-lm(scaleis~
                                ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                ind_female+female3.exc+female02.exc+
                                ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                ind_incNA+
                                scenario,data=pre_del)
# raw dollar preferences
predlib_prefs_doll_group<-lm(or_idoll~
                               ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                               ind_female+female3.exc+female02.exc+
                               ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                               ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                               ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                               ind_incNA+
                               scenario,data=post_del)

# logged dollar preferences
predlib_prefs_doll_log_group<-lm(lidoll~
                                   ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                   ind_female+female3.exc+female02.exc+
                                   ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                   ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                   ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                   ind_incNA+
                                   scenario,data=post_del)

#combine models and export 
stargazer( predlib_prefs_scale_group,  predlib_prefs_doll_group, predlib_prefs_doll_log_group,
           se=list(coef(summary(predlib_prefs_scale_group,cluster = c("jurynum")))[, 2],
                   coef(summary(predlib_prefs_doll_group,cluster = c("jurynum")))[, 2],
                   coef(summary(predlib_prefs_doll_log_group,cluster = c("jurynum")))[, 2]),
           type="latex", out="Appendix Figures/predelib_group_sep_logadd.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
           , omit= "scenario"
           , order=(var_order)
           , align=TRUE
           ,covariate.labels = c("POC" ,"Female" ,  "Young" ,  "Older" , 
                                 "High School Grad" ,  "College Grad" , 
                                 "Low Income" ,  "High Income" , "Missing Income" ,
                                 "4 Whites", "5 Whites", 
                                 "2 Men", "3+ Men", 
                                 "1 Older", "2 Older", "3+ Older",  
                                 "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",   
                                 "1 High Income", "2 High Income", "3+ High Income")
           , omit.stat=c("f", "ser"), omit.table.layout = "n", dep.var.labels=c("Rating", "Dollars", "Log Dollars"), no.space = T, single.row = T,
           add.lines=list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes")))

rm(predlib_prefs_scale_group, predlib_prefs_doll_group, predlib_prefs_doll_log_group)

## Appendix Table A6: Main Text T2 without pre-delib. pref control ####

# include racial composition variables only
postdelib_prefs_simp_nopredlib <- lm(scale_all_rd2~
                                       ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                                       scenario,data=dat_byround)

# All group-level variables
postdelib_prefs_all_nopredlib<-lm(scale_all_rd2~
                                    ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                                    ind_female+female02.exc+female3.exc+
                                    ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                                    ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                                    ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                                    ind_incNA+
                                    scenario,data=dat_byround)


##All group-level variables + pre-deliberation group median
postdelib_prefs_all_2_nopredlib<-lm(scale_all_rd2~
                                      ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                                      ind_female+female02.exc+female3.exc+
                                      ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                                      ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                                      ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                                      ind_incNA+
                                      med_scale_rd1+
                                      scenario,data=dat_byround)


##Create table
  #edited manually in latex to omit group comp. coefficients 
stargazer(postdelib_prefs_simp_nopredlib, postdelib_prefs_all_nopredlib, postdelib_prefs_all_2_nopredlib, 
          se=list(coef(summary(postdelib_prefs_simp_nopredlib,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_prefs_all_nopredlib,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_prefs_all_2_nopredlib,cluster = c("jurynum")))[, 2]),
          type="latex", out="Appendix Figures/postdelib_main_noprefs_combined.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          , order=(var_order), align=TRUE
          ,covariate.labels = c("4 Whites", "5 Whites", 
                                "2 Men", "3+ Men", 
                                "1 Older", "2 Older", "3+ Older",  
                                "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",   
                                "1 High Income", "2 High Income", "3+ High Income",
                                "Jury Median Pre-Delib. Preference")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Post-Deliberation Preference (t2)"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes"),
                           c("Indiv. Demographic Controls?", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)

rm(postdelib_prefs_simp_nopredlib, postdelib_prefs_all_nopredlib, postdelib_prefs_all_2_nopredlib)

## 
## Appendix Table A7: Main Text T2 with indiv/group interactions ####

# for each demographic, interact the individual category (e.g. juror race) with the group comp variable (e.g. racial composition)

# race interaction
postdelib_interact_race_all<-lm(scale_all_rd2~
                                  ind_nonwhite*nonwhite1.exc+ind_nonwhite*nonwhite0.exc+
                                  ind_female+female3.exc+female02.exc+
                                  ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                  ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                  ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                  ind_incNA+scale_all_rd1+scenario,data=dat_byround)
# gender interaction
postdelib_interact_gender_all<-lm(scale_all_rd2~
                                    ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                    ind_female*female3.exc+ind_female*female02.exc+
                                    ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                    ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                    ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                    ind_incNA+scale_all_rd1+scenario,data=dat_byround)

# age interaction
postdelib_interact_age_all<-lm(scale_all_rd2~
                                 ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                 ind_female+female3.exc+female02.exc+
                                 ind_ageYoung+ind_ageOld*old1.exc+ind_ageOld*old2.exc+ind_ageOld*old35.exc+
                                 ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                 ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                 ind_incNA+scale_all_rd1+scenario,data=dat_byround)

# education interaction
postdelib_interact_educ_all<-lm(scale_all_rd2~
                                  ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                  ind_female+female3.exc+female02.exc+
                                  ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                  ind_eduHS+ind_eduCD*college1.exc+ind_eduCD*college2.exc+ind_eduCD*college3.exc+ind_eduCD*college45.exc+
                                  ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                                  ind_incNA+scale_all_rd1+scenario,data=dat_byround)

#income interaction
postdelib_interact_inc_all<-lm(scale_all_rd2~
                                 ind_nonwhite+nonwhite1.exc+nonwhite0.exc+
                                 ind_female+female3.exc+female02.exc+
                                 ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                                 ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                                 ind_incLow+ind_incHigh*highinc1.exc+ind_incHigh*highinc2.exc+ind_incHigh*highinc35.exc+
                                 ind_incNA+scale_all_rd1+scenario,data=dat_byround)


#combine models and print
  #edited manually in latex to omit group composition coefficients and orient horizontally
stargazer(postdelib_interact_race_all, postdelib_interact_gender_all, postdelib_interact_age_all, postdelib_interact_educ_all, postdelib_interact_inc_all, 
          se=list(coef(summary(postdelib_interact_race_all,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_interact_gender_all,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_interact_age_all,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_interact_educ_all,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_interact_inc_all,cluster = c("jurynum")))[, 2]),
          type="latex", out="Appendix Figures/postdelib_interaction_combined.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= "scenario"
          , align=TRUE
          ,covariate.labels = c("POC" ,  "4 White" ,  "5 White" ,  "Female" ,  "2 Men", "3+ Men",
                                "Young" ,  "Older" , "1 Older", "2 Older", "3+ Older", 
                                "High School Grad" ,  "College Grad" ,  "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",
                                "Low Income" ,  "High Income" ,   "1 High Income", "2 High Income", "3+ High Income",  "Missing Income" ,  
                                "Pre-Deliberation Juror Preferences" ,  
                                "POC * 4 White", "POC * 5 White",
                                "Female * 2 Men" ,  "Female * 3+ Men", 
                                "Older * 1 Older" ,  "Older * 2 Older" ,  "Older * 3+ Older",
                                "College Grad * 1 College Grad" ,  "College Grad * 2 College Grad" ,  "College Grad * 3 College Grad" ,  "College Grad * 4+ College Grad" ,
                                "High Income * 1 High Income" ,  "High Income * 2 High Income" ,  "High Income * 3+ High Income" )
          , omit.stat=c("f", "ser"), dep.var.labels=c("Post-Deliberation Preference (t2)"),
          column.labels = c("Race", "Gender", "Age", "Educ.", "Income"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes", "Yes", "Yes")),
          single.row=T)

rm(postdelib_interact_race_all, postdelib_interact_gender_all, postdelib_interact_age_all, postdelib_interact_educ_all, postdelib_interact_inc_all)

## Appendix Table A8: Main Text T2 with lower-status composition indicators ####

# race composition variables only
modelA1_simp<-lm(scale_all_rd2~
                   ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                   scenario,data=dat_byround)

# add all group level demographics
modelA1<-lm(scale_all_rd2~
              ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
              ind_female+female02.exc+female3.exc+
              ind_ageYoung+ind_ageOld+young0.exc+young1.exc+
              ind_eduHS+ind_eduCD+hs0.exc+hs1.exc+
              ind_incLow+ind_incHigh+lowinc0.exc+lowinc1.exc+lowinc2.exc+
              ind_incNA+
              scenario,data=dat_byround)

# add pre-deliberation control
modelA1a<-lm(scale_all_rd2~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+young0.exc+young1.exc+
               ind_eduHS+ind_eduCD+hs0.exc+hs1.exc+
               ind_incLow+ind_incHigh+lowinc0.exc+lowinc1.exc+lowinc2.exc+
               ind_incNA+scale_all_rd1+
               scenario,data=dat_byround)

# add jury median
modelA1b<-lm(scale_all_rd2~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+young0.exc+young1.exc+
               ind_eduHS+ind_eduCD+hs0.exc+hs1.exc+
               ind_incLow+ind_incHigh+lowinc0.exc+lowinc1.exc+lowinc2.exc+
               ind_incNA+scale_all_rd1+med_scale_rd1+
               scenario,data=dat_byround)

# store modified coefficient order 
var_order_lowstatus<-c("ind_nonwhite", "nonwhite1.exc", "nonwhite0.exc", 
                       "ind_female", "female3.exc", "female02.exc", 
                       "ind_ageYoung", "ind_ageOld", "young1.exc", "young0.exc",
                       "ind_eduHS", "ind_eduCD", "hs1.exc", "hs0.exc",
                       "ind_incLow", "ind_incHigh", "lowinc2.exc", "lowinc1.exc", "lowinc0.exc",
                       "ind_incNA", "scale_all_rd1", "med_scale_rd1")

#gather and export models
  #manually edited in latex to omit group composition coefficients
stargazer(modelA1_simp, modelA1, modelA1a, modelA1b, 
          se=list(coef(summary(modelA1_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1a,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1b,cluster = c("jurynum")))[, 2]), 
          type="latex", out="Appendix Figures/postdelib_lowstatus_combined.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          ,order=(var_order_lowstatus)
          ,covariate.labels = c("1 POC" , "0 POC" ,   
                                "3 Women" ,  "0-2 Women" ,   
                                "1 Young", "0 Young",
                                "1 HS Grad", "0 HS Grad",
                                "2 Low Inc", "1 Low Inc", "0 Low Inc", 
                                "Pre-Deliberation Juror Preferences (t0)" ,  "Jury Median Pre-Delib. Preference (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Post-Deliberation Preference (t2)"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes", "Yes"),
                           c("Indiv. Demo Controls?", "Yes", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)

rm(modelA1_simp, modelA1, modelA1a, modelA1b, var_order_lowstatus)

## Appendix Table A9: Main Text T2 with focal juror included in composition indicators ####

# racial composition only 
modelA1_inc_simp<-lm(scale_all_rd2~
                       ind_nonwhite+nonwhite0.inc+nonwhite1.inc+
                       scenario,data=dat_byround)
# add other group composition indicators
modelA1_inc<-lm(scale_all_rd2~
                  ind_nonwhite+nonwhite0.inc+nonwhite1.inc+
                  ind_female+female12.inc+female3.inc+female4.inc+
                  ind_ageYoung+ind_ageOld+old35.inc+old2.inc+old1.inc+
                  ind_eduHS+ind_eduCD+college45.inc+college3.inc+college2.inc+
                  ind_incLow+ind_incHigh+highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                  ind_incNA+
                  scenario,data=dat_byround)
# add pre-deliberation preference
modelA1a_inc<-lm(scale_all_rd2~
                   ind_nonwhite+nonwhite0.inc+nonwhite1.inc+
                   ind_female+female12.inc+female3.inc+female4.inc+
                   ind_ageYoung+ind_ageOld+old35.inc+old2.inc+old1.inc+
                   ind_eduHS+ind_eduCD+college45.inc+college3.inc+college2.inc+
                   ind_incLow+ind_incHigh+highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                   ind_incNA+scale_all_rd1+
                   scenario,data=dat_byround)
# add jury median pre-deliberation preference 
modelA1b_inc<-lm(scale_all_rd2~
                   ind_nonwhite+nonwhite0.inc+nonwhite1.inc+
                   ind_female+female12.inc+female3.inc+female4.inc+
                   ind_ageYoung+ind_ageOld+old35.inc+old2.inc+old1.inc+
                   ind_eduHS+ind_eduCD+college45.inc+college3.inc+college2.inc+
                   ind_incLow+ind_incHigh+highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                   ind_incNA+scale_all_rd1+med_or_idoll+
                   scenario,data=dat_byround)

# store modified variable order
var_order_inc <- c("ind_nonwhite", "ind_female", "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD", "ind_incLow", "ind_incHigh", "ind_incNA", 
                   "nonwhite1.inc", "nonwhite0.inc", 
                   "female4.inc", "female3.inc", "female12.inc", 
                   "old1.inc", "old2.inc", "old35.incc", 
                   "college2.inc", "college3.inc", "college45.inc", 
                   "highinc1.inc", "highinc2.inc", "highinc3.inc", "highinc45.inc")

# gather and export models
  # manually edited in latex to omit all but racial composition variables
stargazer(modelA1_inc_simp, modelA1_inc, modelA1a_inc, modelA1b_inc,
          se=list(coef(summary(modelA1_inc_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1_inc,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1a_inc,cluster = c("jurynum")))[, 2],
                  coef(summary(modelA1b_inc,cluster = c("jurynum")))[, 2]), 
          type="latex", out="Appendix Figures/postdelib_incvars_combined.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          ,order=(var_order_inc)
          ,covariate.labels = c("5 White", "6 White", 
                                "2 Men", "3 Men", "4+ Men",
                                "1 Older", "2 Older", "3+ Older",
                                "2 College Grad", "3 College Grad", "4+ College Grad", 
                                "1 High Income", "2 High Income", "3 High Income", "4+ High Income",
                                "Pre-Deliberation Juror Preferences (t0)" ,  "Jury Median Pre-Delib. Preference (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Punishment"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes", "Yes"),
                           c("Indiv. Demo Controls?", "Yes", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)

rm(modelA1_inc_simp, modelA1_inc, modelA1a_inc, modelA1b_inc,var_order_inc)

## Appendix Table A10: Main Text T2, dollars and ratings separated ####

# scale: racial composition only 
modelB1_simp <- lm(scaleis~
                     ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                     or_idoll+
                     scenario,data=post_del)


# scale: add group level demographics
modelB1a<-lm(scaleis~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
               ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
               ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
               ind_incNA+or_idoll+
               scenario,data=post_del)

# scale: add pre-deliberation median 
modelB1b<-lm(scaleis~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
               ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
               ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
               ind_incNA+or_idoll+med_or_idoll+
               scenario,data=post_del)


# dollars: racial composition only
modelC1_simp<-lm(or_idoll~
                   ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                   scaleis+
                   scenario,data=pre_del)



#dollars: add all group demographics and pre-delib control
modelC1a<-lm(or_idoll~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
               ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
               ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
               ind_incNA+scaleis+
               scenario,data=pre_del)

#dollar: add pre-delib median
modelC1b<-lm(or_idoll~
               ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
               ind_female+female02.exc+female3.exc+
               ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
               ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
               ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
               ind_incNA+scaleis+med_scale+
               scenario,data=pre_del)

var_order_sep=c(var_order, "or_idoll", "med_or_idoll", "scaleis", "med_scale")

# gather and export models
  # manually edited in latex to omit all but racial composition variables 
stargazer(modelB1_simp, modelB1a, modelB1b, modelC1_simp, modelC1a, modelC1b,
          se=list(coef(summary(modelB1_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(modelB1a,cluster = c("jurynum")))[, 2],
                  coef(summary(modelB1b,cluster = c("jurynum")))[, 2],
                  coef(summary(modelC1_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(modelC1a,cluster = c("jurynum")))[, 2],
                  coef(summary(modelC1b,cluster = c("jurynum")))[, 2]), 
          type="latex", out="Appendix Figures/postdelib_main_sep.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          ,order=(var_order_sep)
          ,covariate.labels = c("4 White", "5 White",
                                "2 Men", "3+ Men",
                                "1 Older", "2 Older", "3+ Older",
                                "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",
                                "1 High Income", "2 High Income", "3+ High Income",
                                "Pre-Deliberation Dollar (t0)" ,  "Jury Median Pre-Delib. Dollar (t0)" ,
                                "Pre-Deliberation Rating (t0)", "Jury Median Pre-Delib. Rating (t0)" )
          , omit.stat=c("f", "ser"), dep.var.labels=c("Rating", "Dollars"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Indiv. Demo Controls?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)

rm(modelB1_simp, modelB1a, modelB1b, modelC1_simp, modelC1a, modelC1b, var_order_sep)

## Appendix Table A11: Main Text T2, logged dollar outcome ####
 
# race variables only 
modelC1_log_simp<-lm(lidoll~
                       ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                       scaleis+
                       scenario,data=pre_del)

# add all group demographics
modelC1a_log<-lm(lidoll~
                   ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                   ind_female+female02.exc+female3.exc+
                   ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                   ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                   ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                   ind_incNA+scaleis+
                   scenario,data=pre_del)

# add pre-delib median
modelC1b_log<-lm(lidoll~
                   ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                   ind_female+female02.exc+female3.exc+
                   ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                   ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                   ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                   ind_incNA+scaleis+med_scale+
                   scenario,data=pre_del)

# gather and export
  #edited manually in latex to omit all but racial composition variables 
stargazer(modelC1_log_simp, modelC1a_log, modelC1b_log,
          se=list(coef(summary(modelC1_log_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(modelC1a_log,cluster = c("jurynum")))[, 2],
                  coef(summary(modelC1b_log,cluster = c("jurynum")))[, 2]), 
          type="latex", out="Appendix Figures/postdelib_logged_dollar_sep.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          ,order=(var_order)
          ,covariate.labels = c("4 White", "5 White",
                                "2 Men", "3+ Men",
                                "1 Older", "2 Older", "3+ Older",
                                "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",
                                "1 High Income", "2 High Income", "3+ High Income",
                                "Pre-Deliberation Rating (t0)", "Jury Median Pre-Delib. Rating (t0)" )
          , omit.stat=c("f", "ser"), dep.var.labels=c("Logged Dollar (t2)"),
          add.lines = list(c("Legal Case Fixed Effects?",  "Yes", "Yes", "Yes"),
                           c("Indiv. Demo Controls?",  "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)

rm(modelC1_log_simp, modelC1a_log, modelC1b_log)

## Appendix Table A12: Within-group pre-deliberation opinion ####

# for each juror, 
for(i in 1:nrow(dat_byround)){
  jury <- dat_byround[dat_byround$jurynum==dat_byround$jurynum[i],]
  ipref <- dat_byround$scale_all_rd1[i]
  #extract all other jurors' preferences
  otherpref <- dat_byround$scale_all_rd1[dat_byround$jurynum==dat_byround$jurynum[i]&dat_byround$case_id!=dat_byround$case_id[i]]
  #count the number with prefs higher than the focal juror
  dat_byround$n_above[i] <- sum(otherpref>ipref, na.rm=T)
  #count the number of white jurors with higher preferences
  dat_byround$n_above_white[i] <- sum(jury$scale_all_rd1[jury$ind_nonwhite==0&jury$case_id!=dat_byround$case_id[i]]>ipref)
  #count the number of POC jurors with higher preferences
  dat_byround$n_above_nonwhite[i] <- sum(jury$scale_all_rd1[jury$ind_nonwhite==1&jury$case_id!=dat_byround$case_id[i]]>ipref)
}

# split the count variables into new columns
new_cols <- as.data.frame(dummy_cols(dat_byround$n_above_nonwhite))[,2:6]
colnames(new_cols) <- str_replace(colnames(new_cols), "\\.data_", "n_above_nonwhite_")
new_cols2 <- as.data.frame(dummy_cols(dat_byround$n_above_white))[,2:8]
colnames(new_cols2) <- str_replace(colnames(new_cols2), "\\.data_", "n_above_white_")
# recombine with original data
dat_byround <- cbind(dat_byround, new_cols, new_cols2)

# regress scale on demos, limiting to jurors with 0-2 jurors with more punitive prefs
postdelib_prefs_3 <-lm(scale_all_rd2~
                         ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                         ind_female+female02.exc+female3.exc+
                         ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                         ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                         ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                         ind_incNA+scale_all_rd1+med_scale_rd1+
                         scenario,
                       data=dat_byround %>% filter(n_above_white<=2&n_above_nonwhite<=2&n_above_nonwhite_NA==0&n_above_white_NA==0))

# add n jurors with more-punitive prefs
postdelib_prefs_4 <-lm(scale_all_rd2~
                         ind_nonwhite+nonwhite0.exc+nonwhite1.exc+
                         ind_female+female02.exc+female3.exc+
                         ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                         ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                         ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                         ind_incNA+scale_all_rd1+med_scale_rd1+
                         n_above_white_1+n_above_white_2+
                         n_above_nonwhite_1+n_above_nonwhite_2+
                         scenario,
                       data=dat_byround %>% filter(n_above_white<=2&n_above_nonwhite<=2&n_above_nonwhite_NA==0&n_above_white_NA==0))

#gather and export
  #manually edit in latex to keep only relevant coefs
stargazer(postdelib_prefs_3, postdelib_prefs_4, 
          se=list(coef(summary(postdelib_prefs_3,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_prefs_4,cluster = c("jurynum")))[, 2]),
          type="latex", out="Appendix Figures/postdelib_main_withmedinds.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_nonwhite", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          , order=(var_order)
          ,covariate.labels = c("4 Whites", "5 Whites", 
                                "2 Men", "3+ Men", 
                                "1 Older", "2 Older", "3+ Older",  
                                "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",   
                                "1 High Income", "2 High Income", "3+ High Income",
                                "Individual Pre-delib. Preference" ,  "Jury Median Pre-delib. Preference",
                                "1 White Above Pref", "2 White Above Pref",
                                "1 POC Above Pref", "2 POC Above Pref")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Post-Deliberation Preference (t2)"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes"),
                           c("Indiv. Demographic Controls?", "Yes", "Yes", "Yes"),
                           c("Other Composition Controls?", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)


## Appendix Figure A6: Predicted Punitiveness by Jury Composition ####

# fill in hypothetical data to generate predictions
pp_all.data <- data.frame(
  "ind_nonwhite"=rep(0,13),
  "nonwhite0.exc"=rep(0,13),
  "nonwhite1.exc"=rep(0,13),
  "ind_female"=rep(median(dat_byround$ind_female),13),
  "female02.exc"=rep(0,13),
  "female3.exc"=rep(1,13),
  "ind_ageOld"= rep(median(dat_byround$ind_ageOld),13),
  "ind_ageYoung"= rep(median(dat_byround$ind_ageYoung),13),
  "old35.exc"= rep(0,13),
  "old1.exc"= rep(1,13), 
  "old2.exc"= rep(0,13),
  "ind_eduCD"= rep(median(dat_byround$ind_eduCD),13),
  "ind_eduHS"= rep(median(dat_byround$ind_eduHS),13),
  "college45.exc"= rep(0,13),
  "college1.exc"= rep(0,13),
  "college2.exc"= rep(1,13),
  "college3.exc"= rep(0,13),
  "ind_incHigh"= rep(median(dat_byround$ind_incHigh),13),
  "ind_incLow"= rep(median(dat_byround$ind_incLow),13),
  "highinc35.exc" =rep(0,13),
  "highinc1.exc" = rep(0,13),
  "highinc2.exc" = rep(1,13),
  "ind_incNA"= rep(0,13),
  "scale_all_rd1"=rep(median(dat_byround$scale_all_rd1[is.na(dat_byround$scale_all_rd1)==F]),13),
  "med_scale_rd1"=rep(.5,13),
  "scenario"= rep("Workplace benzene  7",13),
  "n_above_white_1"=c(0,1,1,1,0,0,0,0,1,0,0,1,0),
  "n_above_white_2"=c(0,0,0,0,1,1,1,0,0,1,0,0,1),
  "n_above_nonwhite_1"=c(0,0,1,0,0,1,0,1,1,1,0,0,0),
  "n_above_nonwhite_2"=c(0,0,0,1,0,0,1,0,0,0,1,1,1)
)

# predict post-delib pref
pp_all.values<-as.data.frame(predict(postdelib_prefs_4,pp_all.data,se.fit = T))

# extract confidence intervals
pp_all.values <- mutate(pp_all.values,
                        lwr=fit-1.39*se.fit,
                        upr=fit+1.39*se.fit)

# combine and label values to prepare for graphing
pp_all.graph.data<-data.frame("values"=pp_all.values$fit,
                              "ind_nonwhite"="White",
                              "group_nonwhite.exc"="2-4 POC",
                              "lwr"=pp_all.values$lwr,
                              "upr"=pp_all.values$upr,
                              "n_above_white_1"=c(0,1,1,1,0,0,0,0,1,0,0,1,0),
                              "n_above_white_2"=c(0,0,0,0,1,1,1,0,0,1,0,0,1),
                              "n_above_nonwhite_1"=c(0,0,1,0,0,1,0,1,1,1,0,0,0),
                              "n_above_nonwhite_2"=c(0,0,0,1,0,0,1,0,0,0,1,1,1))
pp_all.graph.data$group_nonwhite.exc<-as.factor(pp_all.graph.data$group_nonwhite.exc)
pp_all.graph.data$group_nonwhite.exc<-factor(pp_all.graph.data$group_nonwhite.exc, levels=c("2-4", "1", "0"))

# select distinct combinations
pp_all.graph.data <- pp_all.graph.data %>%
  mutate(Composition = case_when(n_above_white_1==0&n_above_white_2==0&n_above_nonwhite_1==0&n_above_nonwhite_2==0~"No others higher",
                                 n_above_white_1==1&n_above_white_2==0&n_above_nonwhite_1==0&n_above_nonwhite_2==0~"1 W, 0 POC higher",
                                 n_above_white_1==1&n_above_white_2==0&n_above_nonwhite_1==1&n_above_nonwhite_2==0~"1 W, 1 POC higher",
                                 n_above_white_1==1&n_above_white_2==0&n_above_nonwhite_1==0&n_above_nonwhite_2==1~"1 W, 2 POC higher",
                                 n_above_white_1==0&n_above_white_2==1&n_above_nonwhite_1==0&n_above_nonwhite_2==0~"2 W, 0 POC higher",
                                 n_above_white_1==0&n_above_white_2==1&n_above_nonwhite_1==1&n_above_nonwhite_2==0~"2 W, 1 POC higher",
                                 n_above_white_1==0&n_above_white_2==1&n_above_nonwhite_1==0&n_above_nonwhite_2==1~"2 W, 2 POC higher",
                                 n_above_white_1==0&n_above_white_2==0&n_above_nonwhite_1==1&n_above_nonwhite_2==0~"0 W, 1 POC higher",
                                 n_above_white_1==0&n_above_white_2==0&n_above_nonwhite_1==0&n_above_nonwhite_2==1~"0 W, 2 POC higher")) %>%
  distinct(values, .keep_all = TRUE)
#order levels of composition
pp_all.graph.data$Composition <- factor(pp_all.graph.data$Composition, levels=c(pp_all.graph.data$Composition[order(pp_all.graph.data$values)]), ordered=T)

# plot 
nonwhiteplot_all_facet<-ggplot(pp_all.graph.data, aes(x = Composition, y = values, fill = ind_nonwhite))+ 
  geom_bar(stat="identity", width=.5, position = "dodge")+
  geom_pointrange(aes(x=Composition,ymin=lwr,ymax=upr,group=ind_nonwhite),colour="black",
                  position=position_dodge(width = .5))+
  labs(x="Number of White Jurors in Group, Excluding Respondent",y="Predicted Post-Delib. Preference")+
  theme_bw()+
  scale_fill_grey(start = .4, end = .8,name="Juror Race")+
  theme(
    legend.position="none",
    rect = element_rect(fill = "transparent"),
    axis.text.x = element_text(colour = "black",face="bold"),
    axis.text.y = element_text(colour = "black",face="bold")
  )+
  ylim(0, 0.9)
nonwhiteplot_all_facet
ggsave(here("Appendix Figures/predicted_punishment_withracejurors.pdf"), width = 10, height = 5)

rm(pp_all.data, pp_all.graph.data, pp_all.values, postdelib_prefs_3, postdelib_prefs_4, new_cols, new_cols2)

## Appendix Table A13: first stage of selection model ####

#re-run selection models from main text
  #round 1:
heckit_withPM_controls_rd1_all<-heckit( round1_nothung~
                                          nonwhite0.inc+nonwhite1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+sd_scale_rd1,
                                        
                                        verdict_rd1~
                                          nonwhite0.inc+nonwhite1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+med_scale_rd1,data=group.dat_byround)
  #round 2:
heckit_withPM_controls_rd2_all<-heckit( round2_nothung~
                                          nonwhite0.inc+nonwhite1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+sd_scale_rd1,
                                        
                                        verdict_rd2~
                                          nonwhite0.inc+nonwhite1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+med_scale_rd1,data=group.dat_byround)
# run simplified model with only racial compostion controls, not others
  #round 1:
heckit_withPM_simp_rd1_all<-heckit( round1_nothung~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+sd_scale_rd1 ,
                                    
                                    verdict_rd1~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+med_scale_rd1,data=group.dat_byround)
  #round 2:
heckit_withPM_simp_rd2_all<-heckit( round2_nothung~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+sd_scale_rd1,
                                    
                                    verdict_rd2~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+med_scale_rd1,data=group.dat_byround)


var_order_grp <- c("nonwhite1.inc", "nonwhite0.inc", "female4.inc", "female3.inc", "female12.inc", "old1.inc", "old2.inc", "old35.inc", "college45.inc", "college3.inc", "college2.inc", "college45.inc", "highinc1.inc", "highinc2.inc", "highinc3.inc", "highinc45.inc")

#gather and export
stargazer(heckit_withPM_controls_rd1_all,heckit_withPM_controls_rd2_all, heckit_withPM_simp_rd1_all,  heckit_withPM_simp_rd2_all,
          type="latex", selection.equation=TRUE, out="Appendix Figures/heckit_st1_withPM_all.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "female4.inc", "female3.inc", "female12.inc", "old1.inc", "old2.inc", "old35.inc", "college45.inc", "college3.inc", "college2.inc", "college45.inc", "highinc1.inc", "highinc2.inc", "highinc3.inc", "highinc45.inc")
          , order=(var_order_grp)
          ,covariate.labels = c("5 White", "6 White", 
                                "SD Pre-Delib. Preference (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Reached Verdict (Round 1)", "Reached Verdict (Round 2)"),
          single.row=F, column.sep.width = "2pt", omit.table.layout = "n", no.space = T,
          add.lines=list(c("Addl Jury Demo Controls?", "Yes", "Yes", "No", "No"),
                         c("Incl. St. Dev. Prefs? ", "Yes", "Yes", "Yes", "Yes"))
)

rm(heckit_withPM_controls_rd1_all,heckit_withPM_controls_rd2_all, heckit_withPM_simp_rd1_all,  heckit_withPM_simp_rd2_all)


## Appendix Table A14: second stage of selection, alternative models ####

# models without non-race composition controls:
  #round 1:
heckit_withPM_simp_rd1_all<-heckit( round1_nothung~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+sd_scale_rd1 ,
                                    
                                    verdict_rd1~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+med_scale_rd1,data=group.dat_byround)
  #round 2:
heckit_withPM_simp_rd2_all<-heckit( round2_nothung~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+sd_scale_rd1,
                                    
                                    verdict_rd2~
                                      nonwhite1.inc+nonwhite0.inc+
                                      scenario+med_scale_rd1,data=group.dat_byround)

# models without pre-delbieration median control:
  #round 1:
heckit_noPM_controls_rd1_all<-heckit( round1_nothung~
                                        nonwhite0.inc+nonwhite1.inc+
                                        female12.inc+female3.inc+female4.inc+
                                        old35.inc+old2.inc+old1.inc+
                                        college45.inc+college3.inc+college2.inc+
                                        highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                        scenario+sd_scale_rd1,
                                      
                                      verdict_rd1~
                                        nonwhite0.inc+nonwhite1.inc+
                                        female12.inc+female3.inc+female4.inc+
                                        old35.inc+old2.inc+old1.inc+
                                        college45.inc+college3.inc+college2.inc+
                                        highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                        scenario,data=group.dat_byround)
  #round 2:
heckit_noPM_controls_rd2_all<-heckit( round2_nothung~
                                        nonwhite0.inc+nonwhite1.inc+
                                        female12.inc+female3.inc+female4.inc+
                                        old35.inc+old2.inc+old1.inc+
                                        college45.inc+college3.inc+college2.inc+
                                        highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                        scenario+sd_scale_rd1,
                                      
                                      verdict_rd2~
                                        nonwhite0.inc+nonwhite1.inc+
                                        female12.inc+female3.inc+female4.inc+
                                        old35.inc+old2.inc+old1.inc+
                                        college45.inc+college3.inc+college2.inc+
                                        highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                        scenario,data=group.dat_byround)

#gather and print:
stargazer(heckit_withPM_simp_rd1_all,heckit_withPM_simp_rd2_all,heckit_noPM_controls_rd1_all, heckit_noPM_controls_rd2_all,  
          type="latex", selection.equation=FALSE, out="Appendix Figures/heckit_st2_withPM_simp_all.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "female4.inc", "female3.inc", "female12.inc", "old1.inc", "old2.inc", "old35.inc", "college45.inc", "college3.inc", "college2.inc", "college45.inc", "highinc1.inc", "highinc2.inc", "highinc3.inc", "highinc45.inc")
          ,covariate.labels = c("5 White", "6 White",
                                "Median Pre-Delib. Preference (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Verdict (Round 1)", "Verdict (Round 2)")  ,
          single.row = F, column.sep.width = "2pt", omit.table.layout = "n", no.space = T,
          add.lines=list(c("Jury Composition Controls?", "No", "No", "Yes", "Yes"))
)

rm(heckit_withPM_simp_rd1_all,heckit_withPM_simp_rd2_all,heckit_noPM_controls_rd1_all, heckit_noPM_controls_rd2_all)

## Appendix Table A15: second stage, dollars and ratings separately ####


# round 1, dollars:
Scale_Punishment_heckit1_withPM<-heckit( scalejr_nothung~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+sd_scale ,
                                         
                                         scalejr~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+med_scale,data=group.pre_del)
# round 2, scale:
Scale_Punishment_heckit2_withPM<-heckit( scalejr_nothung~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+sd_or_idoll,
                                         
                                         scalejr~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+med_or_idoll,data=group.post_del)

# round 1: scale
Dollar_Punishment_heckit1_withPM<-heckit(or_jrydoll_nothung~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+sd_or_idoll,
                                         
                                         or_jrydoll~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+med_or_idoll,data=group.post_del)
# round 2: dollars
Dollar_Punishment_heckit2_withPM<-heckit(or_jrydoll_nothung~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+sd_scale,
                                         
                                         or_jrydoll~
                                           nonwhite0.inc+nonwhite1.inc+
                                           female12.inc+female3.inc+female4.inc+
                                           old35.inc+old2.inc+old1.inc+
                                           college45.inc+college3.inc+college2.inc+
                                           highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                           scenario+med_scale,data=group.pre_del)


# gather and print
stargazer(Scale_Punishment_heckit1_withPM,Scale_Punishment_heckit2_withPM, Dollar_Punishment_heckit1_withPM, Dollar_Punishment_heckit2_withPM, 
          type="latex", selection.equation=FALSE, out="Appendix Figures/heckit_st2_sep.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "female4.inc", "female3.inc", "female12.inc", "old1.inc", "old2.inc", "old35.inc", "college45.inc", "college3.inc", "college2.inc", "college45.inc", "highinc1.inc", "highinc2.inc", "highinc3.inc", "highinc45.inc")
          , order=(var_order_grp)
          ,covariate.labels = c("5 White", "6 White", 
                                "Median Pre-Delib. Rating (t0)", "Median Pre-Delib. Dollar (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Rating", "Dollars")  ,
          single.row = F, column.sep.width = "2pt", omit.table.layout = "n", no.space = T,
          add.lines=list(c("Jury Composition Controls?", "No", "No", "Yes", "Yes"))
          
)

rm(Dollar_Punishment_heckit1_withPM,
   Dollar_Punishment_heckit2_withPM,
   Scale_Punishment_heckit1_withPM,
   Scale_Punishment_heckit2_withPM)


## Appendix Table A16: main text table 1 with Hispanic indicator ####

# pre-delib preferences: individual demographics 
predelib_prefs_ind<-lm(scale_all_rd1~
                         ind_hisp+
                         ind_female+
                         ind_ageYoung+ind_ageOld+
                         ind_eduHS+ind_eduCD+
                         ind_incLow+ind_incHigh+
                         ind_incNA+
                         scenario,data=dat_byround)

# add group composition indicators
predelib_prefs_grp<-lm(scale_all_rd1~
                         ind_hisp+hisp1.exc+hisp0.exc+
                         ind_female+female3.exc+female02.exc+
                         ind_ageYoung+ind_ageOld+old1.exc+old2.exc+old35.exc+
                         ind_eduHS+ind_eduCD+college1.exc+college2.exc+college3.exc+college45.exc+
                         ind_incLow+ind_incHigh+highinc1.exc+highinc2.exc+highinc35.exc+
                         ind_incNA+
                         scenario,data=dat_byround)

var_order <- c("ind_hisp", "ind_female", "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD", "ind_incLow", "ind_incHigh", "ind_incNA", "hisp1.exc", "hisp0.exc", "female3.exc", "female02.exc", "old1.exc", "old2.exc", "old35.exc", "college1.exc", "college2.exc", "college3.exc", "college45.exc", "highinc1.exc", "highinc2.exc", "highinc35.exc")

stargazer( predelib_prefs_ind,  predelib_prefs_grp, 
           se=list(coef(summary(predelib_prefs_ind,cluster = c("jurynum")))[, 2],
                   coef(summary(predelib_prefs_grp,cluster = c("jurynum")))[, 2]),
           type="latex", out="Appendix Figures/predelib_hisp.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
           , omit= "scenario"
           , order=(var_order)
           ,covariate.labels = c("Hispanic" ,"Female" ,  "Young" ,  "Older" , 
                                 "High School Grad" ,  "College Grad" , 
                                 "Low Income" ,  "High Income" , "Missing Income" ,
                                 "1 Hispanic", "0 Hispanics", 
                                 "2 Men", "3+ Men", 
                                 "1 Older", "2 Older", "3+ Older",  
                                 "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",   
                                 "1 High Income", "2 High Income", "3+ High Income")
           , omit.stat=c("f", "ser"), omit.table.layout = "n", dep.var.labels=c("Pre-Deliberation Preferences (t_0)"), no.space = T, single.row = T,
           add.lines=list(c("Legal Case Fixed Effects?", "Yes", "Yes")))

rm(predelib_prefs_grp, predelib_prefs_ind)

## Appendix Table A17: main text table 2 with Hispanic indicator ####

# simple model- only race and pre-delib pref
postdelib_prefs_simp <- lm(scale_all_rd2~
                             ind_hisp+hisp0.exc+hisp1.exc+
                             scale_all_rd1+
                             scenario,data=dat_byround)

# add all group composition indicators 
postdelib_prefs_all<-lm(scale_all_rd2~
                          ind_hisp+hisp0.exc+hisp1.exc+
                          ind_female+female02.exc+female3.exc+
                          ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                          ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                          ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                          ind_incNA+scale_all_rd1+
                          scenario,data=dat_byround)

## All group-level variables + pre-deliberation control + pre-deliberation group median
postdelib_prefs_all_2<-lm(scale_all_rd2~
                            ind_hisp+hisp0.exc+hisp1.exc+
                            ind_female+female02.exc+female3.exc+
                            ind_ageYoung+ind_ageOld+old35.exc+old2.exc+old1.exc+
                            ind_eduHS+ind_eduCD+college45.exc+college3.exc+college2.exc+college1.exc+
                            ind_incLow+ind_incHigh+highinc35.exc+highinc2.exc+highinc1.exc+
                            ind_incNA+scale_all_rd1+med_scale_rd1+
                            scenario,data=dat_byround)


##Create table
stargazer(postdelib_prefs_simp, postdelib_prefs_all, postdelib_prefs_all_2, 
          se=list(coef(summary(postdelib_prefs_simp,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_prefs_all,cluster = c("jurynum")))[, 2],
                  coef(summary(postdelib_prefs_all_2,cluster = c("jurynum")))[, 2]),
          type="latex", out="Appendix Figures/postdelib_hisp.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= c("scenario", "ind_hisp", "ind_female", 
                    "ind_ageYoung", "ind_ageOld", "ind_eduHS", "ind_eduCD",
                    "ind_incLow", "ind_incHigh", "ind_incNA")
          , order=(var_order)
          ,covariate.labels = c("1 Hispanic", "0 Hispanics", 
                                "2 Men", "3+ Men", 
                                "1 Older", "2 Older", "3+ Older",  
                                "1 College Grad", "2 College Grad", "3 College Grad", "4+ College Grad",   
                                "1 High Income", "2 High Income", "3+ High Income",
                                "Individual Pre-delib. Preference" ,  "Jury Median Pre-delib. Preference")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Post-Deliberation Preference (t2)"),
          add.lines = list(c("Legal Case Fixed Effects?", "Yes", "Yes", "Yes"),
                           c("Indiv. Demographic Controls?", "Yes", "Yes", "Yes")),
          column.sep.width = "2pt", omit.table.layout = "n", no.space = T)


rm(postdelib_prefs_simp, postdelib_prefs_all, postdelib_prefs_all_2)

## Appendix Table A18: main text table 3 with Hispanic indicator ####

# round 1 selection model
heckit_withPM_controls_rd1_all<-heckit( round1_nothung~
                                          hisp0.inc+hisp1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+sd_scale_rd1,
                                        
                                        verdict_rd1~
                                          hisp0.inc+hisp1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+med_scale_rd1,data=group.dat_byround)

# round 2 selection model
heckit_withPM_controls_rd2_all<-heckit( round2_nothung~
                                          hisp0.inc+hisp1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+sd_scale_rd1,
                                        
                                        verdict_rd2~
                                          hisp0.inc+hisp1.inc+
                                          female12.inc+female3.inc+female4.inc+
                                          old35.inc+old2.inc+old1.inc+
                                          college45.inc+college3.inc+college2.inc+
                                          highinc45.inc+highinc3.inc+highinc2.inc+highinc1.inc+
                                          scenario+med_scale_rd1,data=group.dat_byround)


var_order_grp <- c("hisp1.inc", "hisp0.inc", "female4.inc", "female3.inc", "female12.inc", "old1.inc", "old2.inc", "old35.inc", "college45.inc", "college3.inc", "college2.inc", "college45.inc", "highinc1.inc", "highinc2.incc", "highinc3.inc", "highinc45.inc")


stargazer(heckit_withPM_controls_rd1_all,heckit_withPM_controls_rd2_all, 
          type="latex", selection.equation=FALSE, out="Appendix Figures/heckit_st2_hisp.tex", star.char = c("*", "**", "***"), star.cutoffs = c(.05, .01, .001)
          , omit= "scenario"
          , order=(var_order_grp)
          ,covariate.labels = c("1 Hispanic", "0 Hispanics", 
                                "2 Men", "3 Men", "4+ Men",
                                "1 Older", "2 Older", "3+ Older",
                                "2 College Grad", "3 College Grad", "4+ College Grad", 
                                "1 High Income", "2 High Income", "3 High Income", "4+ High Income", 
                                "Median Pre-Delib. Preference (t0)")
          , omit.stat=c("f", "ser"), dep.var.labels=c("Verdict (Round 1)", "Verdict (Round 2)")  ,
          single.row = F, column.sep.width = "2pt", omit.table.layout = "n", no.space = T
          
)

rm(heckit_withPM_controls_rd1_all, heckit_withPM_controls_rd2_all)

## Appendix Figure A7: distribution of real and simulated POC preferences ####

###Compute difference between Nonwhite and White Jurors prior to deliberation

# Create subsets of racially non-homogenous juries:
pre_del.dist <- pre_del %>% filter(totnwhite.inc != 0)
post_del.dist <- post_del %>% filter(totnwhite.inc != 0)

# calculate within-jury racial group means
graph.data_rating <- pre_del.dist %>% group_by(jurynum,ind_nonwhite) %>% summarize(mean = mean(iscale))
# store absolute differences in racial group means
graph.data_rating <- data.frame(abs(graph.data_rating$mean[graph.data_rating$ind_nonwhite==1] -
                                      graph.data_rating$mean[graph.data_rating$ind_nonwhite==0]))
names(graph.data_rating)[1]<-"diff"
# store mean absolute difference
race.diff.pun <- mean(graph.data_rating$diff,na.rm=T)

#repeat for dollars:
graph.data_dollar <- post_del.dist %>% group_by(jurynum,ind_nonwhite) %>% summarize(mean = mean(eight.doll))
graph.data_dollar <- data.frame(abs(graph.data_dollar$mean[graph.data_dollar$ind_nonwhite==1] -
                                      graph.data_dollar$mean[graph.data_dollar$ind_nonwhite==0]))
names(graph.data_dollar)[1]<-"diff"
race.diff.dol <- mean(graph.data_dollar$diff,na.rm=T)


#Create simulated difference distribution by randomly assigning jurors to be "nonwhite"
set.seed(27514)
# randomly sample new jurors to be white, 500 times
random.vals <- dat %>% ungroup() %>% dplyr::select(ind_nonwhite) %>% 
  rep_sample_n(size=2712, replace=F, reps=500)
# grab columns needed for calculations
relevant.data <- dat %>% dplyr::select(jurynum, iscale, eight.doll, ordernum)

# set up loop to calculate race differences for each jury:
i<-1
# create dataframe to store results
random.data<-data.frame(race.doll=numeric(500),gend.doll=numeric(500),
                        race.scale=integer(500),gend.scale=numeric(500))
random.data[random.data == 0] <- NA

# simulate juries 500 times:
  # THIS WILL TAKE A FEW MINUTES
for (i in 1:500) {
  # append simulated race assignments to data
  new.juries <- relevant.data
  new.juries <- cbind(relevant.data, random.vals %>% filter(replicate == i))
  # calculate new race composition
  new.juries <- new.juries %>% group_by(jurynum) %>% mutate(totnwhite.inc=sum(ind_nonwhite)) %>% ungroup()
  # subset new juries into pre/post deliberation
  nj.pre.del<-subset.data.frame(new.juries,ordernum=="b")
  nj.post.del<-subset.data.frame(new.juries,ordernum=="a")
  # select only juries with at least 1 nonwhite member
  nj.pre.del.race <- nj.pre.del %>% filter(totnwhite.inc != 0)
  nj.post.del.race <- nj.post.del %>% filter(totnwhite.inc != 0)
  # calculate within-jury race means
  temp <- nj.pre.del.race %>% group_by(jurynum,ind_nonwhite) %>% summarize(mean = mean(iscale))
  # calculate mean difference for scale and dollars
  random.data$race.scale[i]<-mean(abs(temp$mean[temp$ind_nonwhite==1]-
                                        temp$mean[temp$ind_nonwhite==0]),na.rm=T)
  temp <- nj.pre.del.race %>% group_by(jurynum,ind_nonwhite) %>% summarize(mean = mean(eight.doll))
  random.data$race.doll[i] <- mean(abs(temp$mean[temp$ind_nonwhite==1]-
                                         temp$mean[temp$ind_nonwhite==0]),na.rm=T)
}

# And now the figures:
plot1_race <- ggplot(random.data,aes(x=race.scale))+
  geom_histogram(bins=100)+
  theme_bw()+
  labs(x="Absolute Value of the Within-Jury Difference between White and POC Jurors",y="Count",
       title="Rating")+
  geom_vline(xintercept = race.diff.pun, color="red")+
  xlim(c(1.0,2.1))

#calculate percentile position of true difference
ecdf(random.data$race.scale)(race.diff.pun)


plot2_race <- ggplot(random.data,aes(x=race.doll))+
  geom_histogram(bins=100)+
  theme_bw()+
  labs(x="Absolute Value of the Within-Jury Difference between White and POC Jurors",y="Count",
       title="Dollars")+
  geom_vline(xintercept = race.diff.dol, color="red")

# calculate percentile position of true difference
ecdf(random.data$race.doll)(race.diff.dol)

# combine plots and save
plotboth_race <- grid.arrange(plot1_race,plot2_race, ncol=2,
                              top = textGrob("Distribution of Inter-Group Cleavages, Randomly Assigned vs. Actual POC Jurors",
                                             gp=gpar(fontsize=20,font=3)))
plotboth_race
ggsave("Appendix Figures/plotboth_race.pdf", plotboth_race, width=14,height=10)


rm(plot1_race,plot2_race,plotboth_race, new.juries, nj.post.del, nj.post.del.race, 
   nj.pre.del, nj.pre.del.race, random.data, random.vals, relevant.data, temp, 
   graph.data_dollar, graph.data_rating)

## Appendix Figure A8: representation figure by round and scale ####

# Code below is similar to main text code section "create datasets for each group: actual and placebo race"
  # with the addition of code to create plots for each facet of the actual data

# Subset to 1-nonwhite juries:
pre_del.hetr <- pre_del %>% filter(totnwhite.inc==1)
post_del.hetr <- post_del %>% filter(totnwhite.inc==1)

#grab all-white juries
pre_del.homog <- pre_del %>% filter(totnwhite.inc==0)
post_del.homog <- post_del %>% filter(totnwhite.inc==0)

#create df of between-race differences for each order/scale:

#dollar deliberations, dollar first juries:
# calculate differences between race groups
dr1 <- post_del.hetr %>% 
  group_by(jurynum,ind_nonwhite,jury.eight,scenario) %>% #group by jury, race; keep jury dollar decision, scenario
  summarize(mean = mean(eight.doll,na.rm=T)) #find mean of dollar prefs
#grab only whites
dr1.white <- dr1 %>% filter(ind_nonwhite==0)
#grab only nonwhites
dr1.nonwhite <- dr1 %>% filter(ind_nonwhite==1)
#merge whites and nonwhites by jury
#create variable for differences between white average and nonwhite
dr1 <- left_join(dr1.white,dr1.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
rm(dr1.nonwhite, dr1.white)
#find absolute difference
dr1 <- dr1 %>% mutate(diff=abs(diff))
#recode NaNs to NAs
dr1$diff[is.nan(dr1$diff)]<-NA
dr1$mean.x[is.nan(dr1$mean.x)]<-NA
dr1$mean.y[is.nan(dr1$mean.y)]<-NA

# repeat for dollar delib, rating first groups:
dr2 <- pre_del.hetr %>% group_by(jurynum,ind_nonwhite,jury.eight,scenario) %>% 
  summarize(mean = mean(eight.doll,na.rm=T))
dr2.white <- dr2 %>% filter(ind_nonwhite==0)
dr2.nonwhite <- dr2 %>% filter(ind_nonwhite==1)
dr2 <- left_join(dr2.white,dr2.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
rm(dr2.nonwhite, dr2.white)

dr2 <- dr2 %>% mutate(diff=abs(diff))
dr2$diff[is.nan(dr2$diff)]<-NA
dr2$mean.x[is.nan(dr2$mean.x)]<-NA
dr2$mean.y[is.nan(dr2$mean.y)]<-NA

# repeat for ratings, ratings first
pr1 <- pre_del.hetr %>% group_by(jurynum,ind_nonwhite,jryscale,scenario) %>% 
  summarize(mean = mean(iscale,na.rm=T))
pr1.white <- pr1 %>% filter(ind_nonwhite==0)
pr1.nonwhite <- pr1 %>% filter(ind_nonwhite==1)
pr1 <- left_join(pr1.white,pr1.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
rm(pr1.white, pr1.nonwhite)

pr1 <- pr1 %>% mutate(diff=abs(diff))
pr1$diff[is.nan(pr1$diff)]<-NA
pr1$mean.x[is.nan(pr1$mean.x)]<-NA
pr1$mean.y[is.nan(pr1$mean.y)]<-NA

#repeat for ratings, dollars first
pr2 <- post_del.hetr %>% group_by(jurynum,ind_nonwhite,jryscale,scenario) %>% 
  summarize(mean = mean(iscale,na.rm=T))
pr2.white <- pr2 %>% filter(ind_nonwhite==0)
pr2.nonwhite <- pr2 %>% filter(ind_nonwhite==1)
pr2 <- left_join(pr2.white,pr2.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
rm(pr2.nonwhite, pr2.white)

pr2 <- pr2 %>% mutate(diff=abs(diff))
pr2$diff[is.nan(pr2$diff)]<-NA
pr2$mean.x[is.nan(pr2$mean.x)]<-NA
pr2$mean.y[is.nan(pr2$mean.y)]<-NA

# create dataframes for each group

dr1.a <- dr1 %>%
  #restrict to juries where white-nonwhite diff at least 1.5
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  #calculate diff from jury mean for whites and nonwhites
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         #calculate diff between whites and nonwhites' differences from mean
         diff.diff = diff.nonwhite-diff.white)
#get median of diff b/w white and nonwhite diffs 
doll_rd1_diff <- median(dr1.a$diff.diff, na.rm = T)
#get p-value of t-test for diff b/w white and nonwhite diffs
label_dr1.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(dr1.a$diff.white,dr1.a$diff.nonwhite)$p.value)
#plot distribution of white diffs, nonwhite diffs
plot2<-ggplot(data=dr1.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 6, y = 0.2, label = label_dr1.a, parse = TRUE, size=5) +
  labs(x="Diff. Racial Group Mean and Jury Decision",y="Density",
       title="Dollars, Round 1")+
  theme_bw()

dr2.a <- dr2 %>% 
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         diff.diff = diff.nonwhite-diff.white)
doll_rd2_diff <- median(dr2.a$diff.diff)
label_dr2.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(dr2.a$diff.white,dr2.a$diff.nonwhite)$p.value)
plot4<-ggplot(data=dr2.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 5, y = 0.3, label = label_dr2.a, parse = TRUE, size=5) +
  labs(x="Diff. Racial Group Mean and Jury Decision",y="Density",
       title="Dollars, Round 2")+
  theme_bw()


pr1.a <- pr1 %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)
rating_rd1_diff <- median(pr1.a$diff.diff)
label_pr1.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(pr1.a$diff.white,pr1.a$diff.nonwhite)$p.value)
plot6<-ggplot(data=pr1.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 4.5, y = 0.5, label = label_pr1.a, parse = TRUE, size=5) +
  labs(x="Diff. Racial Group Mean and Jury Decision",y="Density",
       title="Ratings, Round 1")+
  theme_bw()

pr2.a <- pr2 %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)
label_pr2.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(pr2.a$diff.white,pr2.a$diff.nonwhite)$p.value)
plot8<-ggplot(data=pr2.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 3.5, y = 0.75, label = label_pr2.a, parse = TRUE, size=5) +
  labs(x="Diff. Racial Group Mean and Jury Decision",y="Density",
       title="Ratings, Round 2")+
  theme_bw()

#combine and save out 
dr1.a$round <- 1
dr1.a$type <- "dollar"
dr2.a$round <- 2
dr2.a$type <- "dollar"
pr1.a$round <- 1
pr1.a$type <- "rating"
pr2.a$round <- 2
pr2.a$type <- "rating"

nonwhite_rep_actual <- rbind(dr1.a, dr2.a, pr1.a, pr2.a)
nonwhite_rep_actual$nonwhite <- 1

set.seed(9904)

#within juries, randomly assign one nonwhite juror
post_del.homog <- within(post_del.homog, {
  pseudo_id <- block_ra(blocks = post_del.homog$jurynum, prob_each = c(.83, .17))
})
pre_del.homog <- within(pre_del.homog, {
  pseudo_id <- block_ra(blocks = pre_del.homog$jurynum, prob_each = c(.83, .17))
})


##Calculate raw differences, then abs. val for each group

#Dollar Scale; Dollar-first (rd 1):
dr1_placebo <- post_del.homog %>% group_by(jurynum,pseudo_id,jury.eight,scenario) %>% 
  summarize(mean = mean(eight.doll,na.rm=T))
dr1_placebo.white <- dr1_placebo %>% filter(pseudo_id==0)
dr1_placebo.nonwhite <- dr1_placebo %>% filter(pseudo_id==1)
dr1_placebo <- left_join(dr1_placebo.white,dr1_placebo.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
dr1_placebo <- dr1_placebo %>% mutate(diff=abs(diff))

dr1_placebo.a <- dr1_placebo %>%
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         diff.diff = diff.nonwhite-diff.white)

#Dollar Scale; Rating-first (rd 2):
dr2_placebo <- pre_del.homog %>% group_by(jurynum,pseudo_id,jury.eight,scenario) %>% 
  summarize(mean = mean(eight.doll,na.rm=T))
dr2_placebo.white <- dr2_placebo %>% filter(pseudo_id==0)
dr2_placebo.nonwhite <- dr2_placebo %>% filter(pseudo_id==1)
dr2_placebo <- left_join(dr2_placebo.white,dr2_placebo.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
dr2_placebo <- dr2_placebo %>% mutate(diff=abs(diff))

dr2_placebo.a <- dr2_placebo %>% 
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         diff.diff = diff.nonwhite-diff.white)

#Rating Scale; Rating-first (rd 1):
pr1_placebo <- pre_del.homog %>% group_by(jurynum,pseudo_id,jryscale,scenario) %>% 
  summarize(mean = mean(iscale,na.rm=T))
pr1_placebo.white <- pr1_placebo %>% filter(pseudo_id==0)
pr1_placebo.nonwhite <- pr1_placebo %>% filter(pseudo_id==1)
pr1_placebo <- left_join(pr1_placebo.white,pr1_placebo.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
pr1_placebo <- pr1_placebo %>% mutate(diff=abs(diff))

pr1_placebo.a <- pr1_placebo %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)

#Rating Scale; Dollar-first (rd 2):
pr2_placebo <- post_del.homog %>% group_by(jurynum,pseudo_id,jryscale,scenario) %>% 
  summarize(mean = mean(iscale,na.rm=T))
pr2_placebo.white <- pr2_placebo %>% filter(pseudo_id==0)
pr2_placebo.nonwhite <- pr2_placebo %>% filter(pseudo_id==1)
pr2_placebo <- left_join(pr2_placebo.white,pr2_placebo.nonwhite,by="jurynum") %>% mutate(diff = mean.x-mean.y)
pr2_placebo <- pr2_placebo %>% mutate(diff=abs(diff))

pr2_placebo.a <- pr2_placebo %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)


placebo_rep <- rbind(dr1_placebo.a, dr2_placebo.a, pr1_placebo.a, pr2_placebo.a)
rm(dr1_placebo.white, dr2_placebo.white, pr1_placebo.white, pr2_placebo.white,
   dr1_placebo.nonwhite, dr2_placebo.nonwhite, pr1_placebo.nonwhite, pr2_placebo.nonwhite)

# code from here is added

## create figures: placebo ##

#same as real race, but with randomly-chosen jurors as nonwhite
dr1_placebo.a <- dr1_placebo %>%
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         diff.diff = diff.nonwhite-diff.white)
label_dr1_placebo.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(dr1_placebo.a$diff.white,dr1_placebo.a$diff.nonwhite)$p.value)
plot2_placebo<-ggplot(data=dr1_placebo.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 6, y = 0.2, label = label_dr1_placebo.a, parse = TRUE, size=5) +
  labs(x="Diff. Group Mean and Jury Decision",y="Density",
       title="Placebo, Dollars, Round 1")+
  theme_bw()

##Dollar Scale; Punishment-first (rd 2); 1.5:
dr2_placebo.a <- dr2_placebo %>% 
  filter(diff>=1.5 & is.na(jury.eight.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jury.eight.x),
         diff.nonwhite = abs(mean.y-jury.eight.x),
         diff.diff = diff.nonwhite-diff.white)
label_dr2_placebo.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(dr2_placebo.a$diff.white,dr2_placebo.a$diff.nonwhite)$p.value)
plot4_placebo<-ggplot(data=dr2_placebo.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 5, y = 0.3, label = label_dr2_placebo.a, parse = TRUE, size=5) +
  labs(x="Diff. Group Mean and Jury Decision",y="Density",
       title="Placebo, Dollars, Round 2")+
  theme_bw()

##Punishment Scale; Punishment-first (rd 1); 1.5:
pr1_placebo.a <- pr1_placebo %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)
label_pr1_placebo.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(pr1_placebo.a$diff.white,pr1_placebo.a$diff.nonwhite)$p.value)
plot6_placebo<-ggplot(data=pr1_placebo.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 4, y = 0.4, label = label_pr1_placebo.a, parse = TRUE, size=5) +
  labs(x="Diff. Group Mean and Jury Decision",y="Density",
       title="Placebo, Ratings, Round 1")+
  theme_bw()

##Punishment Scale; Dollar-first (rd 2); 1.5:
pr2_placebo.a <- pr2_placebo %>% 
  filter(diff>=1.5 & is.na(jryscale.x)==F) %>%   
  mutate(diff.white = abs(mean.x-jryscale.x),
         diff.nonwhite = abs(mean.y-jryscale.x),
         diff.diff = diff.nonwhite-diff.white)
pval_pr2_placebo.a <-t.test(pr2_placebo.a$diff.white,pr2_placebo.a$diff.nonwhite)$p.value 
label_pr2_placebo.a <- sprintf(
  "italic(p[gap]) == %.2g",
  t.test(pr2_placebo.a$diff.white,pr2_placebo.a$diff.nonwhite)$p.value)
plot8_placebo<-ggplot(data=pr2_placebo.a)+
  geom_density(aes(diff.white),color="red",fill="red",alpha=0.2)+
  geom_density(aes(diff.nonwhite),color="blue",fill="blue",alpha=0.2)+
  annotate("text", x = 4, y = 0.5, label = label_pr2_placebo.a, parse = TRUE, size=5) +
  labs(x="Diff. Group Mean and Jury Decision",
       y="Density",
       title="Placebo, Ratings, Round 2")+
  theme_bw()

##Combining and Printing the Plots:

pdf("Appendix Figures/racerepfull_all.pdf")
grid.arrange(plot6,plot6_placebo,plot8,plot8_placebo,plot2,plot2_placebo,plot4,plot4_placebo, 
             ncol=2,
             top = textGrob("Representation in Final Decisions, Placebo and Actual POC Jurors"))
dev.off()



## Appendix Figure A9: 
## Appendix Figures A9 and A10 are produced in maintext.R ####
## Appendix Table 19: dissenter characteristics ####

#select relevant columns: scale
diss_a <- dat %>% select(case_id, jurynum, iscale, jryscale, 
                         income, scenario, 
                         ind_nonwhite, ind_female, ind_ageYoung, ind_ageMiddle,  
                         ind_eduHS, ind_eduSC, ind_incLow, ind_incMiddle) %>%
  rename("pref"=iscale, "verdict"=jryscale)
#select relevant columns: dollar
diss_b <- dat %>% select(case_id, jurynum, 
                         income, scenario, eight.doll, jury.eight, 
                         ind_nonwhite, ind_female, ind_ageYoung, ind_ageMiddle,  
                         ind_eduHS, ind_eduSC, ind_incLow, ind_incMiddle) %>%
  mutate("pref"=eight.doll, "verdict"=jury.eight)
#combine 
diss <- bind_rows(diss_a, diss_b, .id="measure") %>%
  mutate(measure = case_when(measure==1~"scale",
                             measure==2~"doll"))
rm(diss_a, diss_b)

#remove jurors missing preferences for each measure
diss <- diss %>% filter(!is.na(pref))

# for each juror...
i <-1
diss$side[i] <- NA
for(i in 1:nrow(diss)){
  #grab the others on their jury
  others <- diss$pref[diss$jurynum==diss$jurynum[i]&diss$case_id!=diss$case_id[i]&diss$measure==diss$measure[i]]
  #note the focal juror's preference
  own <- diss$pref[i]
  #calculate the other jurors' mean preference
  diss$others_mean[i] <- mean(others, na.rm=T)
  #define dissenters as those whose pref is more than 1.5 away from the mean
  diss$dissenter[i] <- abs(own - mean(others, na.rm=T))>1.5
  #note whether preference is unique within jury
  diss$unique[i] <- !(own %in% others)
  #note whether preference is outside range of others in jury
  diss$outside_range[i] <- (own>max(others))|(own<min(others))
  #note which side of the mean dissenters' preferences fall on
  if(diss$dissenter[i]==TRUE){
    diss$side[i] <- ifelse(own>mean(others, na.rm=T), "higher", 
                           ifelse(own<mean(others, na.rm=T), "lower", NA))
  }
}

#for each jury and measure, count the number of dissenters total and on each side
diss <- diss %>%
  group_by(jurynum, measure) %>%
  mutate(n_dissenters = sum(dissenter),
         n_higher = sum(side=="higher", na.rm=T),
         n_lower = sum(side=="lower", na.rm=T))

#count the number of dissenters on same side of mean as focal juror
diss <- diss %>%
  mutate(allies = case_when(side=="higher"~n_higher - 1, 
                            side=="lower"~n_lower - 1)) 

#summarize characteristics and print into latex table
diss %>% 
  group_by(dissenter, ind_nonwhite) %>%
  summarize(unique = mean(unique),
            outside_range = mean(outside_range),
            dissenters = mean(n_dissenters),
            allies = mean(allies),
            pct_f = mean(ind_female),
            pct_young = mean(ind_ageYoung),
            pct_midage = mean(ind_ageMiddle),
            pct_hs = mean(ind_eduHS),
            pct_sc = mean(ind_eduSC),
            pct_inclow = mean(ind_incLow),
            pct_incmid = mean(ind_incMiddle)) %>%
  round(2) %>%
  t() %>%
  stargazer(summary=FALSE, digits = 2)
